diff options
Diffstat (limited to 'src/video_core')
98 files changed, 4441 insertions, 3228 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f7febd6a2..c0e8f6ab1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -156,11 +156,12 @@ add_library(video_core STATIC if (ENABLE_VULKAN) target_sources(video_core PRIVATE - renderer_vulkan/declarations.h renderer_vulkan/fixed_pipeline_state.cpp renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h + renderer_vulkan/nsight_aftermath_tracker.cpp + renderer_vulkan/nsight_aftermath_tracker.h renderer_vulkan/renderer_vulkan.h renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/vk_blit_screen.cpp @@ -214,19 +215,30 @@ if (ENABLE_VULKAN) renderer_vulkan/wrapper.cpp renderer_vulkan/wrapper.h ) - - target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) - target_compile_definitions(video_core PRIVATE HAS_VULKAN) endif() create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) target_link_libraries(video_core PRIVATE glad) + if (ENABLE_VULKAN) + target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) + target_compile_definitions(video_core PRIVATE HAS_VULKAN) target_link_libraries(video_core PRIVATE sirit) endif() +if (ENABLE_NSIGHT_AFTERMATH) + if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK}) + message(ERROR "Environment variable NSIGHT_AFTERMATH_SDK has to be provided") + endif() + if (NOT WIN32) + message(ERROR "Nsight Aftermath doesn't support non-Windows platforms") + endif() + target_compile_definitions(video_core PRIVATE HAS_NSIGHT_AFTERMATH) + target_include_directories(video_core PRIVATE "$ENV{NSIGHT_AFTERMATH_SDK}/include") +endif() + if (MSVC) target_compile_options(video_core PRIVATE /we4267) else() diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b57c0d4d4..83e7a1cde 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -29,10 +29,10 @@ namespace VideoCommon { using MapInterval = std::shared_ptr<MapIntervalBase>; -template <typename TBuffer, typename TBufferType, typename StreamBuffer> +template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> class BufferCache { public: - using BufferInfo = std::pair<const TBufferType*, u64>; + using BufferInfo = std::pair<BufferType, u64>; BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool is_written = false, bool use_fast_cbuf = false) { @@ -89,9 +89,7 @@ public: } } - const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr)); - - return {ToHandle(block), offset}; + return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))}; } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. @@ -156,7 +154,7 @@ public: } } - virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; + virtual BufferType GetEmptyBuffer(std::size_t size) = 0; protected: explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, @@ -166,19 +164,19 @@ protected: ~BufferCache() = default; - virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; + virtual BufferType ToHandle(const OwnerBuffer& storage) = 0; virtual void WriteBarrier() = 0; - virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; + virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; - virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, + virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, const u8* data) = 0; - virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, + virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, u8* data) = 0; - virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, + virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) = 0; virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { @@ -221,9 +219,8 @@ private: return std::make_shared<MapIntervalBase>(start, end, gpu_addr); } - MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, + MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, const std::size_t size) { - std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { auto& memory_manager = system.GPU().MemoryManager(); @@ -272,7 +269,7 @@ private: return new_map; } - void UpdateBlock(const TBuffer& block, VAddr start, VAddr end, + void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, std::vector<MapInterval>& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; @@ -313,7 +310,7 @@ private: void FlushMap(MapInterval map) { std::size_t size = map->GetEnd() - map->GetStart(); - TBuffer block = blocks[map->GetStart() >> block_page_bits]; + OwnerBuffer block = blocks[map->GetStart() >> block_page_bits]; staging_buffer.resize(size); DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); @@ -328,7 +325,7 @@ private: buffer_ptr += size; buffer_offset += size; - return {&stream_buffer_handle, uploaded_offset}; + return {stream_buffer_handle, uploaded_offset}; } void AlignBuffer(std::size_t alignment) { @@ -338,11 +335,11 @@ private: buffer_offset = offset_aligned; } - TBuffer EnlargeBlock(TBuffer buffer) { + OwnerBuffer EnlargeBlock(OwnerBuffer buffer) { const std::size_t old_size = buffer->GetSize(); const std::size_t new_size = old_size + block_page_size; const VAddr cpu_addr = buffer->GetCpuAddr(); - TBuffer new_buffer = CreateBlock(cpu_addr, new_size); + OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size); CopyBlock(buffer, new_buffer, 0, 0, old_size); buffer->SetEpoch(epoch); pending_destruction.push_back(buffer); @@ -356,14 +353,14 @@ private: return new_buffer; } - TBuffer MergeBlocks(TBuffer first, TBuffer second) { + OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) { const std::size_t size_1 = first->GetSize(); const std::size_t size_2 = second->GetSize(); const VAddr first_addr = first->GetCpuAddr(); const VAddr second_addr = second->GetCpuAddr(); const VAddr new_addr = std::min(first_addr, second_addr); const std::size_t new_size = size_1 + size_2; - TBuffer new_buffer = CreateBlock(new_addr, new_size); + OwnerBuffer new_buffer = CreateBlock(new_addr, new_size); CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); first->SetEpoch(epoch); @@ -380,8 +377,8 @@ private: return new_buffer; } - TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { - TBuffer found{}; + OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { + OwnerBuffer found; const VAddr cpu_addr_end = cpu_addr + size - 1; u64 page_start = cpu_addr >> block_page_bits; const u64 page_end = cpu_addr_end >> block_page_bits; @@ -457,7 +454,7 @@ private: Core::System& system; std::unique_ptr<StreamBuffer> stream_buffer; - TBufferType stream_buffer_handle{}; + BufferType stream_buffer_handle{}; bool invalidated = false; @@ -475,9 +472,9 @@ private: static constexpr u64 block_page_bits = 21; static constexpr u64 block_page_size = 1ULL << block_page_bits; - std::unordered_map<u64, TBuffer> blocks; + std::unordered_map<u64, OwnerBuffer> blocks; - std::list<TBuffer> pending_destruction; + std::list<OwnerBuffer> pending_destruction; u64 epoch = 0; u64 modified_ticks = 0; diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 713c14182..0b77afc71 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -12,7 +12,7 @@ namespace Tegra { -DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {} +DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} DmaPusher::~DmaPusher() = default; @@ -26,7 +26,7 @@ void DmaPusher::DispatchCalls() { dma_pushbuffer_subindex = 0; - while (Core::System::GetInstance().IsPoweredOn()) { + while (system.IsPoweredOn()) { if (!Step()) { break; } diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 6ab06518f..d6188614a 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -10,6 +10,10 @@ #include "common/bit_field.h" #include "common/common_types.h" +namespace Core { +class System; +} + namespace Tegra { enum class SubmissionMode : u32 { @@ -56,7 +60,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>; */ class DmaPusher { public: - explicit DmaPusher(GPU& gpu); + explicit DmaPusher(Core::System& system, GPU& gpu); ~DmaPusher(); void Push(CommandList&& entries) { @@ -72,8 +76,6 @@ private: void CallMethod(u32 argument) const; - GPU& gpu; - std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed @@ -92,6 +94,9 @@ private: GPUVAddr dma_mget{}; ///< main pushbuffer last read address bool ib_enable{true}; ///< IB mode enabled + + GPU& gpu; + Core::System& system; }; } // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index ba63b44b4..baa74ad4c 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -92,6 +92,10 @@ void Maxwell3D::InitializeRegisterDefaults() { color_mask.A.Assign(1); } + for (auto& format : regs.vertex_attrib_format) { + format.constant.Assign(1); + } + // NVN games expect these values to be enabled at boot regs.rasterize_enable = 1; regs.rt_separate_frag_data = 1; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 2977a7d81..59d5752d2 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -303,6 +303,10 @@ public: return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); } + bool IsConstant() const { + return constant; + } + bool IsValid() const { return size != Size::Invalid; } @@ -1145,7 +1149,7 @@ public: /// Returns whether the vertex array specified by index is supposed to be /// accessed per instance or not. - bool IsInstancingEnabled(u32 index) const { + bool IsInstancingEnabled(std::size_t index) const { return is_instanced[index]; } } instanced_arrays; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c66c66f6c..7231597d4 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1006,6 +1006,12 @@ union Instruction { } stg; union { + BitField<23, 3, AtomicOp> operation; + BitField<48, 1, u64> extended; + BitField<20, 3, GlobalAtomicType> type; + } red; + + union { BitField<52, 4, AtomicOp> operation; BitField<49, 3, GlobalAtomicType> type; BitField<28, 20, s64> offset; @@ -1501,7 +1507,7 @@ union Instruction { TextureType GetTextureType() const { // The TLDS instruction has a weird encoding for the texture type. - if (texture_info >= 0 && texture_info <= 1) { + if (texture_info <= 1) { return TextureType::Texture1D; } if (texture_info == 2 || texture_info == 8 || texture_info == 12 || @@ -1787,6 +1793,7 @@ public: ST_S, ST, // Store in generic memory STG, // Store in global memory + RED, // Reduction operation ATOM, // Atomic operation on global memory ATOMS, // Atomic operation on shared memory AL2P, // Transforms attribute memory into physical memory @@ -1871,7 +1878,8 @@ public: ICMP_R, ICMP_CR, ICMP_IMM, - FCMP_R, + FCMP_RR, + FCMP_RC, MUFU, // Multi-Function Operator RRO_C, // Range Reduction Operator RRO_R, @@ -2096,6 +2104,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("1110101111111---", Id::RED, Type::Memory, "RED"), INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), @@ -2179,7 +2188,8 @@ private: INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), - INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"), + INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), + INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8acf2eda2..a606f4abd 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -27,7 +27,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { auto& rasterizer{renderer->Rasterizer()}; memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); - dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); + dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index cc434faf7..20e73a37e 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -12,8 +12,9 @@ namespace VideoCommon { GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, std::unique_ptr<Core::Frontend::GraphicsContext>&& context) - : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)), - cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {} + : GPU(system, std::move(renderer_), true), gpu_thread{system}, + cpu_context(renderer->GetRenderWindow().CreateSharedContext()), + gpu_context(std::move(context)) {} GPUAsynch::~GPUAsynch() = default; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index a3389d0d2..fd49bc2a9 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -6,8 +6,8 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -17,10 +17,7 @@ namespace Tegra { MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer}, system{system} { - std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); - std::fill(page_table.attributes.begin(), page_table.attributes.end(), - Common::PageType::Unmapped); - page_table.Resize(address_space_width); + page_table.Resize(address_space_width, page_bits, false); // Initialize the map with a single free region covering the entire managed space. VirtualMemoryArea initial_vma; @@ -55,9 +52,9 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::DeviceMapped) + ->PageTable() + .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::DeviceShared) .IsSuccess()); return gpu_addr; @@ -70,9 +67,9 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::DeviceMapped) + ->PageTable() + .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::DeviceShared) .IsSuccess()); return gpu_addr; } @@ -89,9 +86,10 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { UnmapRange(gpu_addr, aligned_size); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::None) + ->PageTable() + .SetMemoryAttribute(cpu_addr.value(), size, + Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::None) .IsSuccess()); return gpu_addr; @@ -147,16 +145,8 @@ T MemoryManager::Read(GPUVAddr addr) const { return value; } - switch (page_table.attributes[addr >> page_bits]) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr); - return 0; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr); - break; - default: - UNREACHABLE(); - } + UNREACHABLE(); + return {}; } @@ -173,17 +163,7 @@ void MemoryManager::Write(GPUVAddr addr, T data) { return; } - switch (page_table.attributes[addr >> page_bits]) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, - static_cast<u32>(data), addr); - return; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr); - break; - default: - UNREACHABLE(); - } + UNREACHABLE(); } template u8 MemoryManager::Read<u8>(GPUVAddr addr) const; @@ -249,18 +229,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - switch (page_table.attributes[page_index]) { - case Common::PageType::Memory: { - const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; - // Flush must happen on the rasterizer interface, such that memory is always synchronous - // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. - rasterizer.FlushRegion(src_addr, copy_amount); - memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); - break; - } - default: - UNREACHABLE(); - } + const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; + // Flush must happen on the rasterizer interface, such that memory is always synchronous + // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. + rasterizer.FlushRegion(src_addr, copy_amount); + memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); page_index++; page_offset = 0; @@ -305,18 +278,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - switch (page_table.attributes[page_index]) { - case Common::PageType::Memory: { - const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; - // Invalidate must happen on the rasterizer interface, such that memory is always - // synchronous when it is written (even when in asynchronous GPU mode). - rasterizer.InvalidateRegion(dest_addr, copy_amount); - memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); - break; - } - default: - UNREACHABLE(); - } + const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; + // Invalidate must happen on the rasterizer interface, such that memory is always + // synchronous when it is written (even when in asynchronous GPU mode). + rasterizer.InvalidateRegion(dest_addr, copy_amount); + memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); page_index++; page_offset = 0; @@ -362,8 +328,8 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; - const std::size_t page = (addr & Memory::PAGE_MASK) + size; - return page <= Memory::PAGE_SIZE; + const std::size_t page = (addr & Core::Memory::PAGE_MASK) + size; + return page <= Core::Memory::PAGE_SIZE; } void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, @@ -375,12 +341,13 @@ void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageTy ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", base + page_table.pointers.size()); - std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type); - if (memory == nullptr) { - std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory); - std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end, - backing_addr); + while (base != end) { + page_table.pointers[base] = nullptr; + page_table.backing_addr[base] = 0; + + base += 1; + } } else { while (base != end) { page_table.pointers[base] = memory; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 0d9468535..0ddd52d5a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -179,7 +179,7 @@ private: /// End of address space, based on address space in bits. static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; - Common::BackingPageTable page_table{page_bits}; + Common::PageTable page_table; VMAMap vma_map; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index d01db97da..53622ca05 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -23,15 +23,15 @@ constexpr auto RangeFromInterval(Map& map, const Interval& interval) { } // Anonymous namespace -RasterizerAccelerated::RasterizerAccelerated(Memory::Memory& cpu_memory_) +RasterizerAccelerated::RasterizerAccelerated(Core::Memory::Memory& cpu_memory_) : cpu_memory{cpu_memory_} {} RasterizerAccelerated::~RasterizerAccelerated() = default; void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { std::lock_guard lock{pages_mutex}; - const u64 page_start{addr >> Memory::PAGE_BITS}; - const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; + const u64 page_start{addr >> Core::Memory::PAGE_BITS}; + const u64 page_end{(addr + size + Core::Memory::PAGE_SIZE - 1) >> Core::Memory::PAGE_BITS}; // Interval maps will erase segments if count reaches 0, so if delta is negative we have to // subtract after iterating @@ -44,8 +44,8 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del const auto interval = pair.first & pages_interval; const int count = pair.second; - const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; - const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const VAddr interval_start_addr = boost::icl::first(interval) << Core::Memory::PAGE_BITS; + const VAddr interval_end_addr = boost::icl::last_next(interval) << Core::Memory::PAGE_BITS; const u64 interval_size = interval_end_addr - interval_start_addr; if (delta > 0 && count == delta) { diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index 315798e7c..91866d7dd 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "video_core/rasterizer_interface.h" -namespace Memory { +namespace Core::Memory { class Memory; } @@ -20,7 +20,7 @@ namespace VideoCore { /// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface. class RasterizerAccelerated : public RasterizerInterface { public: - explicit RasterizerAccelerated(Memory::Memory& cpu_memory_); + explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_); ~RasterizerAccelerated() override; void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; @@ -30,7 +30,7 @@ private: CachedPageMap cached_pages; std::mutex pages_mutex; - Memory::Memory& cpu_memory; + Core::Memory::Memory& cpu_memory; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 4eb37a96c..cb5792407 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -55,33 +55,31 @@ void OGLBufferCache::WriteBarrier() { glMemoryBarrier(GL_ALL_BARRIER_BITS); } -const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) { +GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { return buffer->GetHandle(); } -const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { - static const GLuint null_buffer = 0; - return &null_buffer; +GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { + return 0; } void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) { - glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), + glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), data); } void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, u8* data) { MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), + glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), data); } void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) { - glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(), - static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset), - static_cast<GLsizeiptr>(size)); + glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset), + static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); } OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, @@ -89,7 +87,7 @@ OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_poi DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); const GLuint& cbuf = cbufs[cbuf_cursor++]; glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); - return {&cbuf, 0}; + return {cbuf, 0}; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index d94a11252..a74817857 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -34,12 +34,12 @@ public: explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); ~CachedBufferBlock(); - const GLuint* GetHandle() const { - return &gl_buffer.handle; + GLuint GetHandle() const { + return gl_buffer.handle; } private: - OGLBuffer gl_buffer{}; + OGLBuffer gl_buffer; }; class OGLBufferCache final : public GenericBufferCache { @@ -48,7 +48,7 @@ public: const Device& device, std::size_t stream_size); ~OGLBufferCache(); - const GLuint* GetEmptyBuffer(std::size_t) override; + GLuint GetEmptyBuffer(std::size_t) override; void Acquire() noexcept { cbuf_cursor = 0; @@ -57,9 +57,9 @@ public: protected: Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; - void WriteBarrier() override; + GLuint ToHandle(const Buffer& buffer) override; - const GLuint* ToHandle(const Buffer& buffer) override; + void WriteBarrier() override; void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) override; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c286502ba..d83dca25a 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -87,7 +87,7 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; - static std::array<std::size_t, 5> stage_swizzle = {0, 1, 2, 3, 4}; + static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4}; const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS); const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index f12e9f55f..d7ba57aca 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -94,9 +94,9 @@ CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { - VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs)); cache = rhs.cache; type = rhs.type; + CachedQueryBase<HostCounter>::operator=(std::move(rhs)); return *this; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f31d960c7..175374f0d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -140,8 +140,8 @@ void RasterizerOpenGL::SetupVertexFormat() { const auto attrib = gpu.regs.vertex_attrib_format[index]; const auto gl_index = static_cast<GLuint>(index); - // Ignore invalid attributes. - if (!attrib.IsValid()) { + // Disable constant attributes. + if (attrib.IsConstant()) { glDisableVertexAttribArray(gl_index); continue; } @@ -188,10 +188,8 @@ void RasterizerOpenGL::SetupVertexBuffer() { ASSERT(end > start); const u64 size = end - start + 1; const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); - - // Bind the vertex array to the buffer at the current offset. - vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer, - vertex_buffer_offset, vertex_array.stride); + glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, + vertex_array.stride); } } @@ -222,7 +220,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - vertex_array_pushbuffer.SetIndexBuffer(buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); return offset; } @@ -524,7 +522,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Prepare vertex array format. SetupVertexFormat(); - vertex_array_pushbuffer.Setup(); // Upload vertex and index data. SetupVertexBuffer(); @@ -534,17 +531,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { index_buffer_offset = SetupIndexBuffer(); } - // Prepare packed bindings. - bind_ubo_pushbuffer.Setup(); - bind_ssbo_pushbuffer.Setup(); - // Setup emulation uniform buffer. GLShader::MaxwellUniformData ubo; ubo.SetFromRegs(gpu); const auto [buffer, offset] = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset, - static_cast<GLsizeiptr>(sizeof(ubo))); + glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, + static_cast<GLsizeiptr>(sizeof(ubo))); // Setup shaders and their used resources. texture_cache.GuardSamplers(true); @@ -557,11 +550,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Signal the buffer cache that we are not going to upload more things. buffer_cache.Unmap(); - // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. - vertex_array_pushbuffer.Bind(); - bind_ubo_pushbuffer.Bind(); - bind_ssbo_pushbuffer.Bind(); - program_manager.BindGraphicsPipeline(); if (texture_cache.TextureBarrier()) { @@ -630,17 +618,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); buffer_cache.Map(buffer_size); - bind_ubo_pushbuffer.Setup(); - bind_ssbo_pushbuffer.Setup(); - SetupComputeConstBuffers(kernel); SetupComputeGlobalMemory(kernel); buffer_cache.Unmap(); - bind_ubo_pushbuffer.Bind(); - bind_ssbo_pushbuffer.Bind(); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; @@ -771,8 +753,8 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const const ConstBufferEntry& entry) { if (!buffer.enabled) { // Set values to zero to unbind buffers - bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, - sizeof(float)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, + sizeof(float)); return; } @@ -783,7 +765,7 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const const auto alignment = device.GetUniformBufferAlignment(); const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, device.HasFastBufferSubData()); - bind_ubo_pushbuffer.Push(binding, cbuf, offset, size); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { @@ -819,7 +801,8 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e const auto alignment{device.GetShaderStorageBufferAlignment()}; const auto [ssbo, buffer_offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); - bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, + static_cast<GLsizeiptr>(size)); } void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { @@ -1432,7 +1415,7 @@ void RasterizerOpenGL::EndTransformFeedback() { const GPUVAddr gpu_addr = binding.Address(); const std::size_t size = binding.buffer_size; const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); + glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 435da4425..caea174d2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -231,9 +231,7 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker}; - BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; - BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; + GLint vertex_binding = 0; std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> transform_feedback_buffers; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 1f1f01313..0cd3ad7e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -484,7 +484,7 @@ private: code.AddLine("switch (jmp_to) {{"); for (const auto& pair : ir.GetBasicBlocks()) { - const auto [address, bb] = pair; + const auto& [address, bb] = pair; code.AddLine("case 0x{:X}U: {{", address); ++code.scope; @@ -835,7 +835,8 @@ private: void DeclareConstantBuffers() { u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& [index, cbuf] : ir.GetConstantBuffers()) { + for (const auto& buffers : ir.GetConstantBuffers()) { + const auto index = buffers.first; code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, GetConstBufferBlock(index)); code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); @@ -1144,6 +1145,7 @@ private: return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; default: UNREACHABLE(); + return {"0", Type::Int}; } case Attribute::Index::FrontColor: return {"gl_Color"s + GetSwizzle(element), Type::Float}; @@ -1482,8 +1484,8 @@ private: dy += '('; for (std::size_t index = 0; index < components; ++index) { - const auto operand_x{derivates.at(index * 2)}; - const auto operand_y{derivates.at(index * 2 + 1)}; + const auto& operand_x{derivates.at(index * 2)}; + const auto& operand_y{derivates.at(index * 2 + 1)}; dx += Visit(operand_x).AsFloat(); dy += Visit(operand_y).AsFloat(); @@ -1821,13 +1823,15 @@ private: Expression HMergeH0(Operation operation) { const std::string dest = VisitOperand(operation, 0).AsUint(); const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("bitfieldInsert({}, {}, 0, 16)", dest, src), Type::Uint}; + return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), + Type::HalfFloat}; } Expression HMergeH1(Operation operation) { const std::string dest = VisitOperand(operation, 0).AsUint(); const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("bitfieldInsert({}, {}, 16, 16)", dest, src), Type::Uint}; + return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), + Type::HalfFloat}; } Expression HPack2(Operation operation) { @@ -2117,8 +2121,14 @@ private: return {}; } return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), - Visit(operation[1]).As(type)), - type}; + Visit(operation[1]).AsUint()), + Type::Uint}; + } + + template <const std::string_view& opname, Type type> + Expression Reduce(Operation operation) { + code.AddLine("{};", Atomic<opname, type>(operation).GetCode()); + return {}; } Expression Branch(Operation operation) { @@ -2477,6 +2487,20 @@ private: &GLSLDecompiler::Atomic<Func::Or, Type::Int>, &GLSLDecompiler::Atomic<Func::Xor, Type::Int>, + &GLSLDecompiler::Reduce<Func::Add, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Min, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Max, Type::Uint>, + &GLSLDecompiler::Reduce<Func::And, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Or, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>, + + &GLSLDecompiler::Reduce<Func::Add, Type::Int>, + &GLSLDecompiler::Reduce<Func::Min, Type::Int>, + &GLSLDecompiler::Reduce<Func::Max, Type::Int>, + &GLSLDecompiler::Reduce<Func::And, Type::Int>, + &GLSLDecompiler::Reduce<Func::Or, Type::Int>, + &GLSLDecompiler::Reduce<Func::Xor, Type::Int>, + &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, &GLSLDecompiler::PushFlowStack, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 0b4d999d7..2729d1265 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -417,7 +417,7 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { switch (params.target) { case SurfaceTarget::Texture2DArray: - glFramebufferTexture(target, attachment, GetTexture(), params.base_level); + glFramebufferTexture(target, attachment, GetTexture(), 0); break; default: UNIMPLEMENTED(); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 89f0e04ef..2c0c77c28 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -191,6 +191,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::Linear: return GL_LINEAR_MIPMAP_LINEAR; } + break; } case Tegra::Texture::TextureFilter::Nearest: { switch (mip_filter_mode) { @@ -201,6 +202,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::Linear: return GL_NEAREST_MIPMAP_LINEAR; } + break; } } LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index f1a28cc21..b2a179746 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -315,8 +315,8 @@ public: RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, Core::Frontend::GraphicsContext& context) - : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, - frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {} + : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, + has_debug_tool{HasDebugTool()} {} RendererOpenGL::~RendererOpenGL() = default; diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index b751086fa..6d7bb16b2 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -14,68 +14,6 @@ namespace OpenGL { -struct VertexArrayPushBuffer::Entry { - GLuint binding_index{}; - const GLuint* buffer{}; - GLintptr offset{}; - GLsizei stride{}; -}; - -VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker) - : state_tracker{state_tracker} {} - -VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; - -void VertexArrayPushBuffer::Setup() { - index_buffer = nullptr; - vertex_buffers.clear(); -} - -void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) { - index_buffer = buffer; -} - -void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer, - GLintptr offset, GLsizei stride) { - vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride}); -} - -void VertexArrayPushBuffer::Bind() { - if (index_buffer) { - state_tracker.BindIndexBuffer(*index_buffer); - } - - for (const auto& entry : vertex_buffers) { - glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride); - } -} - -struct BindBuffersRangePushBuffer::Entry { - GLuint binding; - const GLuint* buffer; - GLintptr offset; - GLsizeiptr size; -}; - -BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} - -BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; - -void BindBuffersRangePushBuffer::Setup() { - entries.clear(); -} - -void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset, - GLsizeiptr size) { - entries.push_back(Entry{binding, buffer, offset, size}); -} - -void BindBuffersRangePushBuffer::Bind() { - for (const Entry& entry : entries) { - glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size); - } -} - void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { if (!GLAD_GL_KHR_debug) { // We don't need to throw an error as this is just for debugging diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 47ee3177b..9c09ee12c 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -11,49 +11,6 @@ namespace OpenGL { -class StateTracker; - -class VertexArrayPushBuffer final { -public: - explicit VertexArrayPushBuffer(StateTracker& state_tracker); - ~VertexArrayPushBuffer(); - - void Setup(); - - void SetIndexBuffer(const GLuint* buffer); - - void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset, - GLsizei stride); - - void Bind(); - -private: - struct Entry; - - StateTracker& state_tracker; - - const GLuint* index_buffer{}; - std::vector<Entry> vertex_buffers; -}; - -class BindBuffersRangePushBuffer final { -public: - explicit BindBuffersRangePushBuffer(GLenum target); - ~BindBuffersRangePushBuffer(); - - void Setup(); - - void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size); - - void Bind(); - -private: - struct Entry; - - GLenum target; - std::vector<Entry> entries; -}; - void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h deleted file mode 100644 index 89a035ca4..000000000 --- a/src/video_core/renderer_vulkan/declarations.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -namespace vk { -class DispatchLoaderDynamic; -} - -namespace Vulkan { -constexpr vk::DispatchLoaderDynamic* dont_use_me_dld = nullptr; -} - -#define VULKAN_HPP_DEFAULT_DISPATCHER (*::Vulkan::dont_use_me_dld) -#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0 -#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 -#include <vulkan/vulkan.hpp> - -namespace Vulkan { - -// vulkan.hpp unique handlers use DispatchLoaderStatic -template <typename T> -using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>; - -using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>; -using UniqueBuffer = UniqueHandle<vk::Buffer>; -using UniqueBufferView = UniqueHandle<vk::BufferView>; -using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>; -using UniqueCommandPool = UniqueHandle<vk::CommandPool>; -using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>; -using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>; -using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>; -using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>; -using UniqueDevice = UniqueHandle<vk::Device>; -using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>; -using UniqueEvent = UniqueHandle<vk::Event>; -using UniqueFence = UniqueHandle<vk::Fence>; -using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>; -using UniqueImage = UniqueHandle<vk::Image>; -using UniqueImageView = UniqueHandle<vk::ImageView>; -using UniqueInstance = UniqueHandle<vk::Instance>; -using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>; -using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>; -using UniquePipeline = UniqueHandle<vk::Pipeline>; -using UniquePipelineCache = UniqueHandle<vk::PipelineCache>; -using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>; -using UniqueQueryPool = UniqueHandle<vk::QueryPool>; -using UniqueRenderPass = UniqueHandle<vk::RenderPass>; -using UniqueSampler = UniqueHandle<vk::Sampler>; -using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>; -using UniqueSemaphore = UniqueHandle<vk::Semaphore>; -using UniqueShaderModule = UniqueHandle<vk::ShaderModule>; -using UniqueSurfaceKHR = UniqueHandle<vk::SurfaceKHR>; -using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>; -using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>; -using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>; -using UniqueDebugUtilsMessengerEXT = UniqueHandle<vk::DebugUtilsMessengerEXT>; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 2bb376555..be1c31978 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -2,10 +2,12 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cstring> #include <tuple> #include <boost/functional/hash.hpp> +#include "common/cityhash.h" #include "common/common_types.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" @@ -13,289 +15,352 @@ namespace Vulkan { namespace { -constexpr FixedPipelineState::DepthStencil GetDepthStencilState(const Maxwell& regs) { - const FixedPipelineState::StencilFace front_stencil( - regs.stencil_front_op_fail, regs.stencil_front_op_zfail, regs.stencil_front_op_zpass, - regs.stencil_front_func_func); - const FixedPipelineState::StencilFace back_stencil = - regs.stencil_two_side_enable - ? FixedPipelineState::StencilFace(regs.stencil_back_op_fail, regs.stencil_back_op_zfail, - regs.stencil_back_op_zpass, - regs.stencil_back_func_func) - : front_stencil; - return FixedPipelineState::DepthStencil( - regs.depth_test_enable == 1, regs.depth_write_enabled == 1, regs.depth_bounds_enable == 1, - regs.stencil_enable == 1, regs.depth_test_func, front_stencil, back_stencil); -} - -constexpr FixedPipelineState::InputAssembly GetInputAssemblyState(const Maxwell& regs) { - return FixedPipelineState::InputAssembly( - regs.draw.topology, regs.primitive_restart.enabled, - regs.draw.topology == Maxwell::PrimitiveTopology::Points ? regs.point_size : 0.0f); -} - -constexpr FixedPipelineState::BlendingAttachment GetBlendingAttachmentState( - const Maxwell& regs, std::size_t render_target) { - const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : render_target]; - const std::array components = {mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0}; - - const FixedPipelineState::BlendingAttachment default_blending( - false, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One, - Maxwell::Blend::Factor::Zero, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One, - Maxwell::Blend::Factor::Zero, components); - if (render_target >= regs.rt_control.count) { - return default_blending; - } +constexpr std::size_t POINT = 0; +constexpr std::size_t LINE = 1; +constexpr std::size_t POLYGON = 2; +constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { + POINT, // Points + LINE, // Lines + LINE, // LineLoop + LINE, // LineStrip + POLYGON, // Triangles + POLYGON, // TriangleStrip + POLYGON, // TriangleFan + POLYGON, // Quads + POLYGON, // QuadStrip + POLYGON, // Polygon + LINE, // LinesAdjacency + LINE, // LineStripAdjacency + POLYGON, // TrianglesAdjacency + POLYGON, // TriangleStripAdjacency + POLYGON, // Patches +}; - if (!regs.independent_blend_enable) { - const auto& src = regs.blend; - if (!src.enable[render_target]) { - return default_blending; - } - return FixedPipelineState::BlendingAttachment( - true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a, - src.factor_source_a, src.factor_dest_a, components); - } +} // Anonymous namespace - if (!regs.blend.enable[render_target]) { - return default_blending; +void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept { + raw = 0; + front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail)); + front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail)); + front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass)); + front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func)); + if (regs.stencil_two_side_enable) { + back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail)); + back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail)); + back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass)); + back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func)); + } else { + back.action_stencil_fail.Assign(front.action_stencil_fail); + back.action_depth_fail.Assign(front.action_depth_fail); + back.action_depth_pass.Assign(front.action_depth_pass); + back.test_func.Assign(front.test_func); } - const auto& src = regs.independent_blend[render_target]; - return FixedPipelineState::BlendingAttachment( - true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a, - src.factor_source_a, src.factor_dest_a, components); + depth_test_enable.Assign(regs.depth_test_enable); + depth_write_enable.Assign(regs.depth_write_enabled); + depth_bounds_enable.Assign(regs.depth_bounds_enable); + stencil_enable.Assign(regs.stencil_enable); + depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); } -constexpr FixedPipelineState::ColorBlending GetColorBlendingState(const Maxwell& regs) { - return FixedPipelineState::ColorBlending( - {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a}, - regs.rt_control.count, - {GetBlendingAttachmentState(regs, 0), GetBlendingAttachmentState(regs, 1), - GetBlendingAttachmentState(regs, 2), GetBlendingAttachmentState(regs, 3), - GetBlendingAttachmentState(regs, 4), GetBlendingAttachmentState(regs, 5), - GetBlendingAttachmentState(regs, 6), GetBlendingAttachmentState(regs, 7)}); -} - -constexpr FixedPipelineState::Tessellation GetTessellationState(const Maxwell& regs) { - return FixedPipelineState::Tessellation(regs.patch_vertices, regs.tess_mode.prim, - regs.tess_mode.spacing, regs.tess_mode.cw != 0); -} - -constexpr std::size_t Point = 0; -constexpr std::size_t Line = 1; -constexpr std::size_t Polygon = 2; -constexpr std::array PolygonOffsetEnableLUT = { - Point, // Points - Line, // Lines - Line, // LineLoop - Line, // LineStrip - Polygon, // Triangles - Polygon, // TriangleStrip - Polygon, // TriangleFan - Polygon, // Quads - Polygon, // QuadStrip - Polygon, // Polygon - Line, // LinesAdjacency - Line, // LineStripAdjacency - Polygon, // TrianglesAdjacency - Polygon, // TriangleStripAdjacency - Polygon, // Patches -}; - -constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) { +void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { + const auto& clip = regs.view_volume_clip_control; const std::array enabled_lut = {regs.polygon_offset_point_enable, regs.polygon_offset_line_enable, regs.polygon_offset_fill_enable}; - const auto topology = static_cast<std::size_t>(regs.draw.topology.Value()); - const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]]; + const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); - const auto& clip = regs.view_volume_clip_control; - const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; - - Maxwell::FrontFace front_face = regs.front_face; + u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0 && regs.viewport_transform[0].scale_y > 0.0f) { - if (front_face == Maxwell::FrontFace::CounterClockWise) - front_face = Maxwell::FrontFace::ClockWise; - else if (front_face == Maxwell::FrontFace::ClockWise) - front_face = Maxwell::FrontFace::CounterClockWise; + // Flip front face + packed_front_face = 1 - packed_front_face; } - const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; - return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled, - depth_clamp_enabled, gl_ndc, regs.cull_face, front_face); + raw = 0; + topology.Assign(topology_index); + primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); + cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); + depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); + depth_clamp_enable.Assign(clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1 ? 1 : 0); + ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0); + cull_face.Assign(PackCullFace(regs.cull_face)); + front_face.Assign(packed_front_face); + polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front)); + patch_control_points_minus_one.Assign(regs.patch_vertices - 1); + tessellation_primitive.Assign(static_cast<u32>(regs.tess_mode.prim.Value())); + tessellation_spacing.Assign(static_cast<u32>(regs.tess_mode.spacing.Value())); + tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); + logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); + logic_op.Assign(PackLogicOp(regs.logic_op.operation)); + std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast } -} // Anonymous namespace - -std::size_t FixedPipelineState::VertexBinding::Hash() const noexcept { - return (index << stride) ^ divisor; +void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept { + for (std::size_t index = 0; index < std::size(attachments); ++index) { + attachments[index].Fill(regs, index); + } } -bool FixedPipelineState::VertexBinding::operator==(const VertexBinding& rhs) const noexcept { - return std::tie(index, stride, divisor) == std::tie(rhs.index, rhs.stride, rhs.divisor); -} +void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) { + const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index]; + + raw = 0; + mask_r.Assign(mask.R); + mask_g.Assign(mask.G); + mask_b.Assign(mask.B); + mask_a.Assign(mask.A); + + // TODO: C++20 Use templated lambda to deduplicate code + + if (!regs.independent_blend_enable) { + const auto& src = regs.blend; + if (!src.enable[index]) { + return; + } + equation_rgb.Assign(PackBlendEquation(src.equation_rgb)); + equation_a.Assign(PackBlendEquation(src.equation_a)); + factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb)); + factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb)); + factor_source_a.Assign(PackBlendFactor(src.factor_source_a)); + factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a)); + enable.Assign(1); + return; + } -std::size_t FixedPipelineState::VertexAttribute::Hash() const noexcept { - return static_cast<std::size_t>(index) ^ (static_cast<std::size_t>(buffer) << 13) ^ - (static_cast<std::size_t>(type) << 22) ^ (static_cast<std::size_t>(size) << 31) ^ - (static_cast<std::size_t>(offset) << 36); + if (!regs.blend.enable[index]) { + return; + } + const auto& src = regs.independent_blend[index]; + equation_rgb.Assign(PackBlendEquation(src.equation_rgb)); + equation_a.Assign(PackBlendEquation(src.equation_a)); + factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb)); + factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb)); + factor_source_a.Assign(PackBlendFactor(src.factor_source_a)); + factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a)); + enable.Assign(1); } -bool FixedPipelineState::VertexAttribute::operator==(const VertexAttribute& rhs) const noexcept { - return std::tie(index, buffer, type, size, offset) == - std::tie(rhs.index, rhs.buffer, rhs.type, rhs.size, rhs.offset); +std::size_t FixedPipelineState::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); + return static_cast<std::size_t>(hash); } -std::size_t FixedPipelineState::StencilFace::Hash() const noexcept { - return static_cast<std::size_t>(action_stencil_fail) ^ - (static_cast<std::size_t>(action_depth_fail) << 4) ^ - (static_cast<std::size_t>(action_depth_fail) << 20) ^ - (static_cast<std::size_t>(action_depth_pass) << 36); +bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { + return std::memcmp(this, &rhs, sizeof *this) == 0; } -bool FixedPipelineState::StencilFace::operator==(const StencilFace& rhs) const noexcept { - return std::tie(action_stencil_fail, action_depth_fail, action_depth_pass, test_func) == - std::tie(rhs.action_stencil_fail, rhs.action_depth_fail, rhs.action_depth_pass, - rhs.test_func); +FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { + FixedPipelineState fixed_state; + fixed_state.rasterizer.Fill(regs); + fixed_state.depth_stencil.Fill(regs); + fixed_state.color_blending.Fill(regs); + fixed_state.padding = {}; + return fixed_state; } -std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept { - return static_cast<std::size_t>(enable) ^ (static_cast<std::size_t>(rgb_equation) << 5) ^ - (static_cast<std::size_t>(src_rgb_func) << 10) ^ - (static_cast<std::size_t>(dst_rgb_func) << 15) ^ - (static_cast<std::size_t>(a_equation) << 20) ^ - (static_cast<std::size_t>(src_a_func) << 25) ^ - (static_cast<std::size_t>(dst_a_func) << 30) ^ - (static_cast<std::size_t>(components[0]) << 35) ^ - (static_cast<std::size_t>(components[1]) << 36) ^ - (static_cast<std::size_t>(components[2]) << 37) ^ - (static_cast<std::size_t>(components[3]) << 38); +u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept { + // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8 + // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range. + // Perfect for a hash. + const u32 value = static_cast<u32>(op); + return value - (value >= 0x200 ? 0x200 : 1); } -bool FixedPipelineState::BlendingAttachment::operator==(const BlendingAttachment& rhs) const - noexcept { - return std::tie(enable, rgb_equation, src_rgb_func, dst_rgb_func, a_equation, src_a_func, - dst_a_func, components) == - std::tie(rhs.enable, rhs.rgb_equation, rhs.src_rgb_func, rhs.dst_rgb_func, - rhs.a_equation, rhs.src_a_func, rhs.dst_a_func, rhs.components); +Maxwell::ComparisonOp FixedPipelineState::UnpackComparisonOp(u32 packed) noexcept { + // Read PackComparisonOp for the logic behind this. + return static_cast<Maxwell::ComparisonOp>(packed + 1); } -std::size_t FixedPipelineState::VertexInput::Hash() const noexcept { - std::size_t hash = num_bindings ^ (num_attributes << 32); - for (std::size_t i = 0; i < num_bindings; ++i) { - boost::hash_combine(hash, bindings[i].Hash()); - } - for (std::size_t i = 0; i < num_attributes; ++i) { - boost::hash_combine(hash, attributes[i].Hash()); +u32 FixedPipelineState::PackStencilOp(Maxwell::StencilOp op) noexcept { + switch (op) { + case Maxwell::StencilOp::Keep: + case Maxwell::StencilOp::KeepOGL: + return 0; + case Maxwell::StencilOp::Zero: + case Maxwell::StencilOp::ZeroOGL: + return 1; + case Maxwell::StencilOp::Replace: + case Maxwell::StencilOp::ReplaceOGL: + return 2; + case Maxwell::StencilOp::Incr: + case Maxwell::StencilOp::IncrOGL: + return 3; + case Maxwell::StencilOp::Decr: + case Maxwell::StencilOp::DecrOGL: + return 4; + case Maxwell::StencilOp::Invert: + case Maxwell::StencilOp::InvertOGL: + return 5; + case Maxwell::StencilOp::IncrWrap: + case Maxwell::StencilOp::IncrWrapOGL: + return 6; + case Maxwell::StencilOp::DecrWrap: + case Maxwell::StencilOp::DecrWrapOGL: + return 7; } - return hash; + return 0; } -bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const noexcept { - return std::equal(bindings.begin(), bindings.begin() + num_bindings, rhs.bindings.begin(), - rhs.bindings.begin() + rhs.num_bindings) && - std::equal(attributes.begin(), attributes.begin() + num_attributes, - rhs.attributes.begin(), rhs.attributes.begin() + rhs.num_attributes); +Maxwell::StencilOp FixedPipelineState::UnpackStencilOp(u32 packed) noexcept { + static constexpr std::array LUT = {Maxwell::StencilOp::Keep, Maxwell::StencilOp::Zero, + Maxwell::StencilOp::Replace, Maxwell::StencilOp::Incr, + Maxwell::StencilOp::Decr, Maxwell::StencilOp::Invert, + Maxwell::StencilOp::IncrWrap, Maxwell::StencilOp::DecrWrap}; + return LUT[packed]; } -std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept { - std::size_t point_size_int = 0; - std::memcpy(&point_size_int, &point_size, sizeof(point_size)); - return (static_cast<std::size_t>(topology) << 24) ^ (point_size_int << 32) ^ - static_cast<std::size_t>(primitive_restart_enable); +u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept { + // FrontAndBack is 0x408, by substracting 0x406 in it we get 2. + // Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1. + const u32 value = static_cast<u32>(cull); + return value - (value == 0x408 ? 0x406 : 0x404); } -bool FixedPipelineState::InputAssembly::operator==(const InputAssembly& rhs) const noexcept { - return std::tie(topology, primitive_restart_enable, point_size) == - std::tie(rhs.topology, rhs.primitive_restart_enable, rhs.point_size); +Maxwell::CullFace FixedPipelineState::UnpackCullFace(u32 packed) noexcept { + static constexpr std::array LUT = {Maxwell::CullFace::Front, Maxwell::CullFace::Back, + Maxwell::CullFace::FrontAndBack}; + return LUT[packed]; } -std::size_t FixedPipelineState::Tessellation::Hash() const noexcept { - return static_cast<std::size_t>(patch_control_points) ^ - (static_cast<std::size_t>(primitive) << 6) ^ (static_cast<std::size_t>(spacing) << 8) ^ - (static_cast<std::size_t>(clockwise) << 10); +u32 FixedPipelineState::PackFrontFace(Maxwell::FrontFace face) noexcept { + return static_cast<u32>(face) - 0x900; } -bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const noexcept { - return std::tie(patch_control_points, primitive, spacing, clockwise) == - std::tie(rhs.patch_control_points, rhs.primitive, rhs.spacing, rhs.clockwise); +Maxwell::FrontFace FixedPipelineState::UnpackFrontFace(u32 packed) noexcept { + return static_cast<Maxwell::FrontFace>(packed + 0x900); } -std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept { - return static_cast<std::size_t>(cull_enable) ^ - (static_cast<std::size_t>(depth_bias_enable) << 1) ^ - (static_cast<std::size_t>(depth_clamp_enable) << 2) ^ - (static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^ - (static_cast<std::size_t>(cull_face) << 24) ^ - (static_cast<std::size_t>(front_face) << 48); +u32 FixedPipelineState::PackPolygonMode(Maxwell::PolygonMode mode) noexcept { + return static_cast<u32>(mode) - 0x1B00; } -bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept { - return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one, - cull_face, front_face) == - std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable, - rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face); +Maxwell::PolygonMode FixedPipelineState::UnpackPolygonMode(u32 packed) noexcept { + return static_cast<Maxwell::PolygonMode>(packed + 0x1B00); } -std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept { - std::size_t hash = static_cast<std::size_t>(depth_test_enable) ^ - (static_cast<std::size_t>(depth_write_enable) << 1) ^ - (static_cast<std::size_t>(depth_bounds_enable) << 2) ^ - (static_cast<std::size_t>(stencil_enable) << 3) ^ - (static_cast<std::size_t>(depth_test_function) << 4); - boost::hash_combine(hash, front_stencil.Hash()); - boost::hash_combine(hash, back_stencil.Hash()); - return hash; +u32 FixedPipelineState::PackLogicOp(Maxwell::LogicOperation op) noexcept { + return static_cast<u32>(op) - 0x1500; } -bool FixedPipelineState::DepthStencil::operator==(const DepthStencil& rhs) const noexcept { - return std::tie(depth_test_enable, depth_write_enable, depth_bounds_enable, depth_test_function, - stencil_enable, front_stencil, back_stencil) == - std::tie(rhs.depth_test_enable, rhs.depth_write_enable, rhs.depth_bounds_enable, - rhs.depth_test_function, rhs.stencil_enable, rhs.front_stencil, - rhs.back_stencil); +Maxwell::LogicOperation FixedPipelineState::UnpackLogicOp(u32 packed) noexcept { + return static_cast<Maxwell::LogicOperation>(packed + 0x1500); } -std::size_t FixedPipelineState::ColorBlending::Hash() const noexcept { - std::size_t hash = attachments_count << 13; - for (std::size_t rt = 0; rt < static_cast<std::size_t>(attachments_count); ++rt) { - boost::hash_combine(hash, attachments[rt].Hash()); +u32 FixedPipelineState::PackBlendEquation(Maxwell::Blend::Equation equation) noexcept { + switch (equation) { + case Maxwell::Blend::Equation::Add: + case Maxwell::Blend::Equation::AddGL: + return 0; + case Maxwell::Blend::Equation::Subtract: + case Maxwell::Blend::Equation::SubtractGL: + return 1; + case Maxwell::Blend::Equation::ReverseSubtract: + case Maxwell::Blend::Equation::ReverseSubtractGL: + return 2; + case Maxwell::Blend::Equation::Min: + case Maxwell::Blend::Equation::MinGL: + return 3; + case Maxwell::Blend::Equation::Max: + case Maxwell::Blend::Equation::MaxGL: + return 4; } - return hash; + return 0; } -bool FixedPipelineState::ColorBlending::operator==(const ColorBlending& rhs) const noexcept { - return std::equal(attachments.begin(), attachments.begin() + attachments_count, - rhs.attachments.begin(), rhs.attachments.begin() + rhs.attachments_count); -} - -std::size_t FixedPipelineState::Hash() const noexcept { - std::size_t hash = 0; - boost::hash_combine(hash, vertex_input.Hash()); - boost::hash_combine(hash, input_assembly.Hash()); - boost::hash_combine(hash, tessellation.Hash()); - boost::hash_combine(hash, rasterizer.Hash()); - boost::hash_combine(hash, depth_stencil.Hash()); - boost::hash_combine(hash, color_blending.Hash()); - return hash; +Maxwell::Blend::Equation FixedPipelineState::UnpackBlendEquation(u32 packed) noexcept { + static constexpr std::array LUT = { + Maxwell::Blend::Equation::Add, Maxwell::Blend::Equation::Subtract, + Maxwell::Blend::Equation::ReverseSubtract, Maxwell::Blend::Equation::Min, + Maxwell::Blend::Equation::Max}; + return LUT[packed]; } -bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { - return std::tie(vertex_input, input_assembly, tessellation, rasterizer, depth_stencil, - color_blending) == std::tie(rhs.vertex_input, rhs.input_assembly, - rhs.tessellation, rhs.rasterizer, rhs.depth_stencil, - rhs.color_blending); +u32 FixedPipelineState::PackBlendFactor(Maxwell::Blend::Factor factor) noexcept { + switch (factor) { + case Maxwell::Blend::Factor::Zero: + case Maxwell::Blend::Factor::ZeroGL: + return 0; + case Maxwell::Blend::Factor::One: + case Maxwell::Blend::Factor::OneGL: + return 1; + case Maxwell::Blend::Factor::SourceColor: + case Maxwell::Blend::Factor::SourceColorGL: + return 2; + case Maxwell::Blend::Factor::OneMinusSourceColor: + case Maxwell::Blend::Factor::OneMinusSourceColorGL: + return 3; + case Maxwell::Blend::Factor::SourceAlpha: + case Maxwell::Blend::Factor::SourceAlphaGL: + return 4; + case Maxwell::Blend::Factor::OneMinusSourceAlpha: + case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: + return 5; + case Maxwell::Blend::Factor::DestAlpha: + case Maxwell::Blend::Factor::DestAlphaGL: + return 6; + case Maxwell::Blend::Factor::OneMinusDestAlpha: + case Maxwell::Blend::Factor::OneMinusDestAlphaGL: + return 7; + case Maxwell::Blend::Factor::DestColor: + case Maxwell::Blend::Factor::DestColorGL: + return 8; + case Maxwell::Blend::Factor::OneMinusDestColor: + case Maxwell::Blend::Factor::OneMinusDestColorGL: + return 9; + case Maxwell::Blend::Factor::SourceAlphaSaturate: + case Maxwell::Blend::Factor::SourceAlphaSaturateGL: + return 10; + case Maxwell::Blend::Factor::Source1Color: + case Maxwell::Blend::Factor::Source1ColorGL: + return 11; + case Maxwell::Blend::Factor::OneMinusSource1Color: + case Maxwell::Blend::Factor::OneMinusSource1ColorGL: + return 12; + case Maxwell::Blend::Factor::Source1Alpha: + case Maxwell::Blend::Factor::Source1AlphaGL: + return 13; + case Maxwell::Blend::Factor::OneMinusSource1Alpha: + case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: + return 14; + case Maxwell::Blend::Factor::ConstantColor: + case Maxwell::Blend::Factor::ConstantColorGL: + return 15; + case Maxwell::Blend::Factor::OneMinusConstantColor: + case Maxwell::Blend::Factor::OneMinusConstantColorGL: + return 16; + case Maxwell::Blend::Factor::ConstantAlpha: + case Maxwell::Blend::Factor::ConstantAlphaGL: + return 17; + case Maxwell::Blend::Factor::OneMinusConstantAlpha: + case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: + return 18; + } + return 0; } -FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { - FixedPipelineState fixed_state; - fixed_state.input_assembly = GetInputAssemblyState(regs); - fixed_state.tessellation = GetTessellationState(regs); - fixed_state.rasterizer = GetRasterizerState(regs); - fixed_state.depth_stencil = GetDepthStencilState(regs); - fixed_state.color_blending = GetColorBlendingState(regs); - return fixed_state; +Maxwell::Blend::Factor FixedPipelineState::UnpackBlendFactor(u32 packed) noexcept { + static constexpr std::array LUT = { + Maxwell::Blend::Factor::Zero, + Maxwell::Blend::Factor::One, + Maxwell::Blend::Factor::SourceColor, + Maxwell::Blend::Factor::OneMinusSourceColor, + Maxwell::Blend::Factor::SourceAlpha, + Maxwell::Blend::Factor::OneMinusSourceAlpha, + Maxwell::Blend::Factor::DestAlpha, + Maxwell::Blend::Factor::OneMinusDestAlpha, + Maxwell::Blend::Factor::DestColor, + Maxwell::Blend::Factor::OneMinusDestColor, + Maxwell::Blend::Factor::SourceAlphaSaturate, + Maxwell::Blend::Factor::Source1Color, + Maxwell::Blend::Factor::OneMinusSource1Color, + Maxwell::Blend::Factor::Source1Alpha, + Maxwell::Blend::Factor::OneMinusSource1Alpha, + Maxwell::Blend::Factor::ConstantColor, + Maxwell::Blend::Factor::OneMinusConstantColor, + Maxwell::Blend::Factor::ConstantAlpha, + Maxwell::Blend::Factor::OneMinusConstantAlpha, + }; + return LUT[packed]; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 4c8ba7f90..9fe6bdbf9 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -7,6 +7,7 @@ #include <array> #include <type_traits> +#include "common/bit_field.h" #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" @@ -16,93 +17,48 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -// TODO(Rodrigo): Optimize this structure. +struct alignas(32) FixedPipelineState { + static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept; + static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept; -struct FixedPipelineState { - using PixelFormat = VideoCore::Surface::PixelFormat; + static u32 PackStencilOp(Maxwell::StencilOp op) noexcept; + static Maxwell::StencilOp UnpackStencilOp(u32 packed) noexcept; - struct VertexBinding { - constexpr VertexBinding(u32 index, u32 stride, u32 divisor) - : index{index}, stride{stride}, divisor{divisor} {} - VertexBinding() = default; + static u32 PackCullFace(Maxwell::CullFace cull) noexcept; + static Maxwell::CullFace UnpackCullFace(u32 packed) noexcept; - u32 index; - u32 stride; - u32 divisor; + static u32 PackFrontFace(Maxwell::FrontFace face) noexcept; + static Maxwell::FrontFace UnpackFrontFace(u32 packed) noexcept; - std::size_t Hash() const noexcept; - - bool operator==(const VertexBinding& rhs) const noexcept; - - bool operator!=(const VertexBinding& rhs) const noexcept { - return !operator==(rhs); - } - }; - - struct VertexAttribute { - constexpr VertexAttribute(u32 index, u32 buffer, Maxwell::VertexAttribute::Type type, - Maxwell::VertexAttribute::Size size, u32 offset) - : index{index}, buffer{buffer}, type{type}, size{size}, offset{offset} {} - VertexAttribute() = default; - - u32 index; - u32 buffer; - Maxwell::VertexAttribute::Type type; - Maxwell::VertexAttribute::Size size; - u32 offset; - - std::size_t Hash() const noexcept; - - bool operator==(const VertexAttribute& rhs) const noexcept; - - bool operator!=(const VertexAttribute& rhs) const noexcept { - return !operator==(rhs); - } - }; - - struct StencilFace { - constexpr StencilFace(Maxwell::StencilOp action_stencil_fail, - Maxwell::StencilOp action_depth_fail, - Maxwell::StencilOp action_depth_pass, Maxwell::ComparisonOp test_func) - : action_stencil_fail{action_stencil_fail}, action_depth_fail{action_depth_fail}, - action_depth_pass{action_depth_pass}, test_func{test_func} {} - StencilFace() = default; - - Maxwell::StencilOp action_stencil_fail; - Maxwell::StencilOp action_depth_fail; - Maxwell::StencilOp action_depth_pass; - Maxwell::ComparisonOp test_func; + static u32 PackPolygonMode(Maxwell::PolygonMode mode) noexcept; + static Maxwell::PolygonMode UnpackPolygonMode(u32 packed) noexcept; - std::size_t Hash() const noexcept; + static u32 PackLogicOp(Maxwell::LogicOperation op) noexcept; + static Maxwell::LogicOperation UnpackLogicOp(u32 packed) noexcept; - bool operator==(const StencilFace& rhs) const noexcept; + static u32 PackBlendEquation(Maxwell::Blend::Equation equation) noexcept; + static Maxwell::Blend::Equation UnpackBlendEquation(u32 packed) noexcept; - bool operator!=(const StencilFace& rhs) const noexcept { - return !operator==(rhs); - } - }; + static u32 PackBlendFactor(Maxwell::Blend::Factor factor) noexcept; + static Maxwell::Blend::Factor UnpackBlendFactor(u32 packed) noexcept; struct BlendingAttachment { - constexpr BlendingAttachment(bool enable, Maxwell::Blend::Equation rgb_equation, - Maxwell::Blend::Factor src_rgb_func, - Maxwell::Blend::Factor dst_rgb_func, - Maxwell::Blend::Equation a_equation, - Maxwell::Blend::Factor src_a_func, - Maxwell::Blend::Factor dst_a_func, - std::array<bool, 4> components) - : enable{enable}, rgb_equation{rgb_equation}, src_rgb_func{src_rgb_func}, - dst_rgb_func{dst_rgb_func}, a_equation{a_equation}, src_a_func{src_a_func}, - dst_a_func{dst_a_func}, components{components} {} - BlendingAttachment() = default; - - bool enable; - Maxwell::Blend::Equation rgb_equation; - Maxwell::Blend::Factor src_rgb_func; - Maxwell::Blend::Factor dst_rgb_func; - Maxwell::Blend::Equation a_equation; - Maxwell::Blend::Factor src_a_func; - Maxwell::Blend::Factor dst_a_func; - std::array<bool, 4> components; + union { + u32 raw; + BitField<0, 1, u32> mask_r; + BitField<1, 1, u32> mask_g; + BitField<2, 1, u32> mask_b; + BitField<3, 1, u32> mask_a; + BitField<4, 3, u32> equation_rgb; + BitField<7, 3, u32> equation_a; + BitField<10, 5, u32> factor_source_rgb; + BitField<15, 5, u32> factor_dest_rgb; + BitField<20, 5, u32> factor_source_a; + BitField<25, 5, u32> factor_dest_a; + BitField<30, 1, u32> enable; + }; + + void Fill(const Maxwell& regs, std::size_t index); std::size_t Hash() const noexcept; @@ -111,136 +67,178 @@ struct FixedPipelineState { bool operator!=(const BlendingAttachment& rhs) const noexcept { return !operator==(rhs); } - }; - - struct VertexInput { - std::size_t num_bindings = 0; - std::size_t num_attributes = 0; - std::array<VertexBinding, Maxwell::NumVertexArrays> bindings; - std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; - - std::size_t Hash() const noexcept; - bool operator==(const VertexInput& rhs) const noexcept; + constexpr std::array<bool, 4> Mask() const noexcept { + return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; + } - bool operator!=(const VertexInput& rhs) const noexcept { - return !operator==(rhs); + Maxwell::Blend::Equation EquationRGB() const noexcept { + return UnpackBlendEquation(equation_rgb.Value()); } - }; - struct InputAssembly { - constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable, - float point_size) - : topology{topology}, primitive_restart_enable{primitive_restart_enable}, - point_size{point_size} {} - InputAssembly() = default; + Maxwell::Blend::Equation EquationAlpha() const noexcept { + return UnpackBlendEquation(equation_a.Value()); + } - Maxwell::PrimitiveTopology topology; - bool primitive_restart_enable; - float point_size; + Maxwell::Blend::Factor SourceRGBFactor() const noexcept { + return UnpackBlendFactor(factor_source_rgb.Value()); + } - std::size_t Hash() const noexcept; + Maxwell::Blend::Factor DestRGBFactor() const noexcept { + return UnpackBlendFactor(factor_dest_rgb.Value()); + } - bool operator==(const InputAssembly& rhs) const noexcept; + Maxwell::Blend::Factor SourceAlphaFactor() const noexcept { + return UnpackBlendFactor(factor_source_a.Value()); + } - bool operator!=(const InputAssembly& rhs) const noexcept { - return !operator==(rhs); + Maxwell::Blend::Factor DestAlphaFactor() const noexcept { + return UnpackBlendFactor(factor_dest_a.Value()); } }; - struct Tessellation { - constexpr Tessellation(u32 patch_control_points, Maxwell::TessellationPrimitive primitive, - Maxwell::TessellationSpacing spacing, bool clockwise) - : patch_control_points{patch_control_points}, primitive{primitive}, spacing{spacing}, - clockwise{clockwise} {} - Tessellation() = default; - - u32 patch_control_points; - Maxwell::TessellationPrimitive primitive; - Maxwell::TessellationSpacing spacing; - bool clockwise; - - std::size_t Hash() const noexcept; - - bool operator==(const Tessellation& rhs) const noexcept; + struct VertexInput { + union Binding { + u16 raw; + BitField<0, 1, u16> enabled; + BitField<1, 12, u16> stride; + }; + + union Attribute { + u32 raw; + BitField<0, 1, u32> enabled; + BitField<1, 5, u32> buffer; + BitField<6, 14, u32> offset; + BitField<20, 3, u32> type; + BitField<23, 6, u32> size; + + constexpr Maxwell::VertexAttribute::Type Type() const noexcept { + return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); + } + + constexpr Maxwell::VertexAttribute::Size Size() const noexcept { + return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); + } + }; + + std::array<Binding, Maxwell::NumVertexArrays> bindings; + std::array<u32, Maxwell::NumVertexArrays> binding_divisors; + std::array<Attribute, Maxwell::NumVertexAttributes> attributes; + + void SetBinding(std::size_t index, bool enabled, u32 stride, u32 divisor) noexcept { + auto& binding = bindings[index]; + binding.raw = 0; + binding.enabled.Assign(enabled ? 1 : 0); + binding.stride.Assign(stride); + binding_divisors[index] = divisor; + } - bool operator!=(const Tessellation& rhs) const noexcept { - return !operator==(rhs); + void SetAttribute(std::size_t index, bool enabled, u32 buffer, u32 offset, + Maxwell::VertexAttribute::Type type, + Maxwell::VertexAttribute::Size size) noexcept { + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(enabled ? 1 : 0); + attribute.buffer.Assign(buffer); + attribute.offset.Assign(offset); + attribute.type.Assign(static_cast<u32>(type)); + attribute.size.Assign(static_cast<u32>(size)); } }; struct Rasterizer { - constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, - bool ndc_minus_one_to_one, Maxwell::CullFace cull_face, - Maxwell::FrontFace front_face) - : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, - depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, - cull_face{cull_face}, front_face{front_face} {} - Rasterizer() = default; - - bool cull_enable; - bool depth_bias_enable; - bool depth_clamp_enable; - bool ndc_minus_one_to_one; - Maxwell::CullFace cull_face; - Maxwell::FrontFace front_face; - - std::size_t Hash() const noexcept; + union { + u32 raw; + BitField<0, 4, u32> topology; + BitField<4, 1, u32> primitive_restart_enable; + BitField<5, 1, u32> cull_enable; + BitField<6, 1, u32> depth_bias_enable; + BitField<7, 1, u32> depth_clamp_enable; + BitField<8, 1, u32> ndc_minus_one_to_one; + BitField<9, 2, u32> cull_face; + BitField<11, 1, u32> front_face; + BitField<12, 2, u32> polygon_mode; + BitField<14, 5, u32> patch_control_points_minus_one; + BitField<19, 2, u32> tessellation_primitive; + BitField<21, 2, u32> tessellation_spacing; + BitField<23, 1, u32> tessellation_clockwise; + BitField<24, 1, u32> logic_op_enable; + BitField<25, 4, u32> logic_op; + }; + + // TODO(Rodrigo): Move this to push constants + u32 point_size; + + void Fill(const Maxwell& regs) noexcept; + + constexpr Maxwell::PrimitiveTopology Topology() const noexcept { + return static_cast<Maxwell::PrimitiveTopology>(topology.Value()); + } - bool operator==(const Rasterizer& rhs) const noexcept; + Maxwell::CullFace CullFace() const noexcept { + return UnpackCullFace(cull_face.Value()); + } - bool operator!=(const Rasterizer& rhs) const noexcept { - return !operator==(rhs); + Maxwell::FrontFace FrontFace() const noexcept { + return UnpackFrontFace(front_face.Value()); } }; struct DepthStencil { - constexpr DepthStencil(bool depth_test_enable, bool depth_write_enable, - bool depth_bounds_enable, bool stencil_enable, - Maxwell::ComparisonOp depth_test_function, StencilFace front_stencil, - StencilFace back_stencil) - : depth_test_enable{depth_test_enable}, depth_write_enable{depth_write_enable}, - depth_bounds_enable{depth_bounds_enable}, stencil_enable{stencil_enable}, - depth_test_function{depth_test_function}, front_stencil{front_stencil}, - back_stencil{back_stencil} {} - DepthStencil() = default; - - bool depth_test_enable; - bool depth_write_enable; - bool depth_bounds_enable; - bool stencil_enable; - Maxwell::ComparisonOp depth_test_function; - StencilFace front_stencil; - StencilFace back_stencil; - - std::size_t Hash() const noexcept; - - bool operator==(const DepthStencil& rhs) const noexcept; - - bool operator!=(const DepthStencil& rhs) const noexcept { - return !operator==(rhs); + template <std::size_t Position> + union StencilFace { + BitField<Position + 0, 3, u32> action_stencil_fail; + BitField<Position + 3, 3, u32> action_depth_fail; + BitField<Position + 6, 3, u32> action_depth_pass; + BitField<Position + 9, 3, u32> test_func; + + Maxwell::StencilOp ActionStencilFail() const noexcept { + return UnpackStencilOp(action_stencil_fail); + } + + Maxwell::StencilOp ActionDepthFail() const noexcept { + return UnpackStencilOp(action_depth_fail); + } + + Maxwell::StencilOp ActionDepthPass() const noexcept { + return UnpackStencilOp(action_depth_pass); + } + + Maxwell::ComparisonOp TestFunc() const noexcept { + return UnpackComparisonOp(test_func); + } + }; + + union { + u32 raw; + StencilFace<0> front; + StencilFace<12> back; + BitField<24, 1, u32> depth_test_enable; + BitField<25, 1, u32> depth_write_enable; + BitField<26, 1, u32> depth_bounds_enable; + BitField<27, 1, u32> stencil_enable; + BitField<28, 3, u32> depth_test_func; + }; + + void Fill(const Maxwell& regs) noexcept; + + Maxwell::ComparisonOp DepthTestFunc() const noexcept { + return UnpackComparisonOp(depth_test_func); } }; struct ColorBlending { - constexpr ColorBlending( - std::array<float, 4> blend_constants, std::size_t attachments_count, - std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments) - : attachments_count{attachments_count}, attachments{attachments} {} - ColorBlending() = default; - - std::size_t attachments_count; std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; - std::size_t Hash() const noexcept; - - bool operator==(const ColorBlending& rhs) const noexcept; - - bool operator!=(const ColorBlending& rhs) const noexcept { - return !operator==(rhs); - } + void Fill(const Maxwell& regs) noexcept; }; + VertexInput vertex_input; + Rasterizer rasterizer; + DepthStencil depth_stencil; + ColorBlending color_blending; + std::array<u8, 20> padding; + std::size_t Hash() const noexcept; bool operator==(const FixedPipelineState& rhs) const noexcept; @@ -248,25 +246,11 @@ struct FixedPipelineState { bool operator!=(const FixedPipelineState& rhs) const noexcept { return !operator==(rhs); } - - VertexInput vertex_input; - InputAssembly input_assembly; - Tessellation tessellation; - Rasterizer rasterizer; - DepthStencil depth_stencil; - ColorBlending color_blending; }; -static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexBinding>); -static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexAttribute>); -static_assert(std::is_trivially_copyable_v<FixedPipelineState::StencilFace>); -static_assert(std::is_trivially_copyable_v<FixedPipelineState::BlendingAttachment>); -static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexInput>); -static_assert(std::is_trivially_copyable_v<FixedPipelineState::InputAssembly>); -static_assert(std::is_trivially_copyable_v<FixedPipelineState::Tessellation>); -static_assert(std::is_trivially_copyable_v<FixedPipelineState::Rasterizer>); -static_assert(std::is_trivially_copyable_v<FixedPipelineState::DepthStencil>); -static_assert(std::is_trivially_copyable_v<FixedPipelineState::ColorBlending>); +static_assert(std::has_unique_object_representations_v<FixedPipelineState>); static_assert(std::is_trivially_copyable_v<FixedPipelineState>); +static_assert(std::is_trivially_constructible_v<FixedPipelineState>); +static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned"); FixedPipelineState GetFixedPipelineState(const Maxwell& regs); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 7480cb7c3..8681b821f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -2,13 +2,15 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <iterator> + #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" namespace Vulkan::MaxwellToVK { @@ -17,88 +19,89 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; namespace Sampler { -vk::Filter Filter(Tegra::Texture::TextureFilter filter) { +VkFilter Filter(Tegra::Texture::TextureFilter filter) { switch (filter) { case Tegra::Texture::TextureFilter::Linear: - return vk::Filter::eLinear; + return VK_FILTER_LINEAR; case Tegra::Texture::TextureFilter::Nearest: - return vk::Filter::eNearest; + return VK_FILTER_NEAREST; } UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); return {}; } -vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { +VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { switch (mipmap_filter) { case Tegra::Texture::TextureMipmapFilter::None: // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to // use an image view with a single mipmap level to emulate this. - return vk::SamplerMipmapMode::eLinear; + return VK_SAMPLER_MIPMAP_MODE_LINEAR; + ; case Tegra::Texture::TextureMipmapFilter::Linear: - return vk::SamplerMipmapMode::eLinear; + return VK_SAMPLER_MIPMAP_MODE_LINEAR; case Tegra::Texture::TextureMipmapFilter::Nearest: - return vk::SamplerMipmapMode::eNearest; + return VK_SAMPLER_MIPMAP_MODE_NEAREST; } UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); return {}; } -vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, - Tegra::Texture::TextureFilter filter) { +VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, + Tegra::Texture::TextureFilter filter) { switch (wrap_mode) { case Tegra::Texture::WrapMode::Wrap: - return vk::SamplerAddressMode::eRepeat; + return VK_SAMPLER_ADDRESS_MODE_REPEAT; case Tegra::Texture::WrapMode::Mirror: - return vk::SamplerAddressMode::eMirroredRepeat; + return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; case Tegra::Texture::WrapMode::ClampToEdge: - return vk::SamplerAddressMode::eClampToEdge; + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::Border: - return vk::SamplerAddressMode::eClampToBorder; + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; case Tegra::Texture::WrapMode::Clamp: - if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { + if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this // by sending an invalid enumeration. - return static_cast<vk::SamplerAddressMode>(0xcafe); + return static_cast<VkSamplerAddressMode>(0xcafe); } // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors switch (filter) { case Tegra::Texture::TextureFilter::Nearest: - return vk::SamplerAddressMode::eClampToEdge; + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; case Tegra::Texture::TextureFilter::Linear: - return vk::SamplerAddressMode::eClampToBorder; + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; } UNREACHABLE(); - return vk::SamplerAddressMode::eClampToEdge; + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: - return vk::SamplerAddressMode::eMirrorClampToEdge; + return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::MirrorOnceBorder: UNIMPLEMENTED(); - return vk::SamplerAddressMode::eMirrorClampToEdge; + return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; default: UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); return {}; } } -vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { +VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { switch (depth_compare_func) { case Tegra::Texture::DepthCompareFunc::Never: - return vk::CompareOp::eNever; + return VK_COMPARE_OP_NEVER; case Tegra::Texture::DepthCompareFunc::Less: - return vk::CompareOp::eLess; + return VK_COMPARE_OP_LESS; case Tegra::Texture::DepthCompareFunc::LessEqual: - return vk::CompareOp::eLessOrEqual; + return VK_COMPARE_OP_LESS_OR_EQUAL; case Tegra::Texture::DepthCompareFunc::Equal: - return vk::CompareOp::eEqual; + return VK_COMPARE_OP_EQUAL; case Tegra::Texture::DepthCompareFunc::NotEqual: - return vk::CompareOp::eNotEqual; + return VK_COMPARE_OP_NOT_EQUAL; case Tegra::Texture::DepthCompareFunc::Greater: - return vk::CompareOp::eGreater; + return VK_COMPARE_OP_GREATER; case Tegra::Texture::DepthCompareFunc::GreaterEqual: - return vk::CompareOp::eGreaterOrEqual; + return VK_COMPARE_OP_GREATER_OR_EQUAL; case Tegra::Texture::DepthCompareFunc::Always: - return vk::CompareOp::eAlways; + return VK_COMPARE_OP_ALWAYS; } UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", static_cast<u32>(depth_compare_func)); @@ -112,92 +115,92 @@ namespace { enum : u32 { Attachable = 1, Storage = 2 }; struct FormatTuple { - vk::Format format; ///< Vulkan format - int usage; ///< Describes image format usage + VkFormat format; ///< Vulkan format + int usage = 0; ///< Describes image format usage } constexpr tex_format_tuples[] = { - {vk::Format::eA8B8G8R8UnormPack32, Attachable | Storage}, // ABGR8U - {vk::Format::eA8B8G8R8SnormPack32, Attachable | Storage}, // ABGR8S - {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI - {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U - {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U - {vk::Format::eA1R5G5B5UnormPack16, Attachable}, // A1B5G5R5U (flipped with swizzle) - {vk::Format::eR8Unorm, Attachable | Storage}, // R8U - {vk::Format::eR8Uint, Attachable | Storage}, // R8UI - {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F - {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U - {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S - {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI - {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F - {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI - {vk::Format::eBc1RgbaUnormBlock, {}}, // DXT1 - {vk::Format::eBc2UnormBlock, {}}, // DXT23 - {vk::Format::eBc3UnormBlock, {}}, // DXT45 - {vk::Format::eBc4UnormBlock, {}}, // DXN1 - {vk::Format::eBc5UnormBlock, {}}, // DXN2UNORM - {vk::Format::eBc5SnormBlock, {}}, // DXN2SNORM - {vk::Format::eBc7UnormBlock, {}}, // BC7U - {vk::Format::eBc6HUfloatBlock, {}}, // BC6H_UF16 - {vk::Format::eBc6HSfloatBlock, {}}, // BC6H_SF16 - {vk::Format::eAstc4x4UnormBlock, {}}, // ASTC_2D_4X4 - {vk::Format::eB8G8R8A8Unorm, {}}, // BGRA8 - {vk::Format::eR32G32B32A32Sfloat, Attachable | Storage}, // RGBA32F - {vk::Format::eR32G32Sfloat, Attachable | Storage}, // RG32F - {vk::Format::eR32Sfloat, Attachable | Storage}, // R32F - {vk::Format::eR16Sfloat, Attachable | Storage}, // R16F - {vk::Format::eR16Unorm, Attachable | Storage}, // R16U - {vk::Format::eUndefined, {}}, // R16S - {vk::Format::eUndefined, {}}, // R16UI - {vk::Format::eUndefined, {}}, // R16I - {vk::Format::eR16G16Unorm, Attachable | Storage}, // RG16 - {vk::Format::eR16G16Sfloat, Attachable | Storage}, // RG16F - {vk::Format::eUndefined, {}}, // RG16UI - {vk::Format::eUndefined, {}}, // RG16I - {vk::Format::eR16G16Snorm, Attachable | Storage}, // RG16S - {vk::Format::eUndefined, {}}, // RGB32F - {vk::Format::eR8G8B8A8Srgb, Attachable}, // RGBA8_SRGB - {vk::Format::eR8G8Unorm, Attachable | Storage}, // RG8U - {vk::Format::eR8G8Snorm, Attachable | Storage}, // RG8S - {vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI - {vk::Format::eUndefined, {}}, // RGBX16F - {vk::Format::eR32Uint, Attachable | Storage}, // R32UI - {vk::Format::eR32Sint, Attachable | Storage}, // R32I - {vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8 - {vk::Format::eUndefined, {}}, // ASTC_2D_8X5 - {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 - {vk::Format::eUndefined, {}}, // BGRA8_SRGB - {vk::Format::eBc1RgbaSrgbBlock, {}}, // DXT1_SRGB - {vk::Format::eBc2SrgbBlock, {}}, // DXT23_SRGB - {vk::Format::eBc3SrgbBlock, {}}, // DXT45_SRGB - {vk::Format::eBc7SrgbBlock, {}}, // BC7U_SRGB - {vk::Format::eR4G4B4A4UnormPack16, Attachable}, // R4G4B4A4U - {vk::Format::eAstc4x4SrgbBlock, {}}, // ASTC_2D_4X4_SRGB - {vk::Format::eAstc8x8SrgbBlock, {}}, // ASTC_2D_8X8_SRGB - {vk::Format::eAstc8x5SrgbBlock, {}}, // ASTC_2D_8X5_SRGB - {vk::Format::eAstc5x4SrgbBlock, {}}, // ASTC_2D_5X4_SRGB - {vk::Format::eAstc5x5UnormBlock, {}}, // ASTC_2D_5X5 - {vk::Format::eAstc5x5SrgbBlock, {}}, // ASTC_2D_5X5_SRGB - {vk::Format::eAstc10x8UnormBlock, {}}, // ASTC_2D_10X8 - {vk::Format::eAstc10x8SrgbBlock, {}}, // ASTC_2D_10X8_SRGB - {vk::Format::eAstc6x6UnormBlock, {}}, // ASTC_2D_6X6 - {vk::Format::eAstc6x6SrgbBlock, {}}, // ASTC_2D_6X6_SRGB - {vk::Format::eAstc10x10UnormBlock, {}}, // ASTC_2D_10X10 - {vk::Format::eAstc10x10SrgbBlock, {}}, // ASTC_2D_10X10_SRGB - {vk::Format::eAstc12x12UnormBlock, {}}, // ASTC_2D_12X12 - {vk::Format::eAstc12x12SrgbBlock, {}}, // ASTC_2D_12X12_SRGB - {vk::Format::eAstc8x6UnormBlock, {}}, // ASTC_2D_8X6 - {vk::Format::eAstc8x6SrgbBlock, {}}, // ASTC_2D_8X6_SRGB - {vk::Format::eAstc6x5UnormBlock, {}}, // ASTC_2D_6X5 - {vk::Format::eAstc6x5SrgbBlock, {}}, // ASTC_2D_6X5_SRGB - {vk::Format::eE5B9G9R9UfloatPack32, {}}, // E5B9G9R9F + {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // ABGR8U + {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // ABGR8S + {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // ABGR8UI + {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5U + {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10U + {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5U (flipped with swizzle) + {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8U + {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8UI + {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // RGBA16F + {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // RGBA16U + {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // RGBA16S + {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // RGBA16UI + {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // R11FG11FB10F + {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // RGBA32UI + {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // DXT1 + {VK_FORMAT_BC2_UNORM_BLOCK}, // DXT23 + {VK_FORMAT_BC3_UNORM_BLOCK}, // DXT45 + {VK_FORMAT_BC4_UNORM_BLOCK}, // DXN1 + {VK_FORMAT_BC5_UNORM_BLOCK}, // DXN2UNORM + {VK_FORMAT_BC5_SNORM_BLOCK}, // DXN2SNORM + {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7U + {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 + {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 + {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 + {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8 + {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F + {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F + {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F + {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F + {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U + {VK_FORMAT_UNDEFINED}, // R16S + {VK_FORMAT_UNDEFINED}, // R16UI + {VK_FORMAT_UNDEFINED}, // R16I + {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 + {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F + {VK_FORMAT_UNDEFINED}, // RG16UI + {VK_FORMAT_UNDEFINED}, // RG16I + {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // RG16S + {VK_FORMAT_UNDEFINED}, // RGB32F + {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // RGBA8_SRGB + {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // RG8U + {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // RG8S + {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // RG32UI + {VK_FORMAT_UNDEFINED}, // RGBX16F + {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32UI + {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32I + {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 + {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 + {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 + {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB + {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB + {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB + {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB + {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7U_SRGB + {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // R4G4B4A4U + {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB + {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB + {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB + {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB + {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5 + {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB + {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8 + {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB + {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6 + {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB + {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10 + {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB + {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12 + {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB + {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6 + {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB + {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5 + {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB + {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9F // Depth formats - {vk::Format::eD32Sfloat, Attachable}, // Z32F - {vk::Format::eD16Unorm, Attachable}, // Z16 + {VK_FORMAT_D32_SFLOAT, Attachable}, // Z32F + {VK_FORMAT_D16_UNORM, Attachable}, // Z16 // DepthStencil formats - {vk::Format::eD24UnormS8Uint, Attachable}, // Z24S8 - {vk::Format::eD24UnormS8Uint, Attachable}, // S8Z24 (emulated) - {vk::Format::eD32SfloatS8Uint, Attachable}, // Z32FS8 + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // Z24S8 + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8Z24 (emulated) + {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // Z32FS8 }; static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat); @@ -212,106 +215,106 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples)); auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; - if (tuple.format == vk::Format::eUndefined) { + if (tuple.format == VK_FORMAT_UNDEFINED) { UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", static_cast<u32>(pixel_format)); - return {vk::Format::eA8B8G8R8UnormPack32, true, true}; + return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; } // Use ABGR8 on hardware that doesn't support ASTC natively if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) - ? vk::Format::eA8B8G8R8SrgbPack32 - : vk::Format::eA8B8G8R8UnormPack32; + ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 + : VK_FORMAT_A8B8G8R8_UNORM_PACK32; } const bool attachable = tuple.usage & Attachable; const bool storage = tuple.usage & Storage; - vk::FormatFeatureFlags usage; + VkFormatFeatureFlags usage; if (format_type == FormatType::Buffer) { - usage = vk::FormatFeatureFlagBits::eStorageTexelBuffer | - vk::FormatFeatureFlagBits::eUniformTexelBuffer; + usage = + VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; } else { - usage = vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eTransferDst | - vk::FormatFeatureFlagBits::eTransferSrc; + usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | + VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; if (attachable) { - usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment - : vk::FormatFeatureFlagBits::eColorAttachment; + usage |= IsZetaFormat(pixel_format) ? VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT + : VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; } if (storage) { - usage |= vk::FormatFeatureFlagBits::eStorageImage; + usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; } } return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } -vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { +VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { switch (stage) { case Tegra::Engines::ShaderType::Vertex: - return vk::ShaderStageFlagBits::eVertex; + return VK_SHADER_STAGE_VERTEX_BIT; case Tegra::Engines::ShaderType::TesselationControl: - return vk::ShaderStageFlagBits::eTessellationControl; + return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; case Tegra::Engines::ShaderType::TesselationEval: - return vk::ShaderStageFlagBits::eTessellationEvaluation; + return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; case Tegra::Engines::ShaderType::Geometry: - return vk::ShaderStageFlagBits::eGeometry; + return VK_SHADER_STAGE_GEOMETRY_BIT; case Tegra::Engines::ShaderType::Fragment: - return vk::ShaderStageFlagBits::eFragment; + return VK_SHADER_STAGE_FRAGMENT_BIT; case Tegra::Engines::ShaderType::Compute: - return vk::ShaderStageFlagBits::eCompute; + return VK_SHADER_STAGE_COMPUTE_BIT; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); return {}; } -vk::PrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, - Maxwell::PrimitiveTopology topology) { +VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, + Maxwell::PrimitiveTopology topology) { switch (topology) { case Maxwell::PrimitiveTopology::Points: - return vk::PrimitiveTopology::ePointList; + return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; case Maxwell::PrimitiveTopology::Lines: - return vk::PrimitiveTopology::eLineList; + return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; case Maxwell::PrimitiveTopology::LineStrip: - return vk::PrimitiveTopology::eLineStrip; + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; case Maxwell::PrimitiveTopology::Triangles: - return vk::PrimitiveTopology::eTriangleList; + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; case Maxwell::PrimitiveTopology::TriangleStrip: - return vk::PrimitiveTopology::eTriangleStrip; + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; case Maxwell::PrimitiveTopology::TriangleFan: - return vk::PrimitiveTopology::eTriangleFan; + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; case Maxwell::PrimitiveTopology::Quads: // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases - return vk::PrimitiveTopology::eTriangleList; + return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; case Maxwell::PrimitiveTopology::Patches: - return vk::PrimitiveTopology::ePatchList; + return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; default: UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); return {}; } } -vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { +VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { switch (type) { case Maxwell::VertexAttribute::Type::SignedNorm: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Snorm; + return VK_FORMAT_R8_SNORM; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Snorm; + return VK_FORMAT_R8G8_SNORM; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Snorm; + return VK_FORMAT_R8G8B8_SNORM; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Snorm; + return VK_FORMAT_R8G8B8A8_SNORM; case Maxwell::VertexAttribute::Size::Size_16: - return vk::Format::eR16Snorm; + return VK_FORMAT_R16_SNORM; case Maxwell::VertexAttribute::Size::Size_16_16: - return vk::Format::eR16G16Snorm; + return VK_FORMAT_R16G16_SNORM; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return vk::Format::eR16G16B16Snorm; + return VK_FORMAT_R16G16B16_SNORM; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Snorm; + return VK_FORMAT_R16G16B16A16_SNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: - return vk::Format::eA2B10G10R10SnormPack32; + return VK_FORMAT_A2B10G10R10_SNORM_PACK32; default: break; } @@ -319,23 +322,23 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr case Maxwell::VertexAttribute::Type::UnsignedNorm: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Unorm; + return VK_FORMAT_R8_UNORM; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Unorm; + return VK_FORMAT_R8G8_UNORM; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Unorm; + return VK_FORMAT_R8G8B8_UNORM; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Unorm; + return VK_FORMAT_R8G8B8A8_UNORM; case Maxwell::VertexAttribute::Size::Size_16: - return vk::Format::eR16Unorm; + return VK_FORMAT_R16_UNORM; case Maxwell::VertexAttribute::Size::Size_16_16: - return vk::Format::eR16G16Unorm; + return VK_FORMAT_R16G16_UNORM; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return vk::Format::eR16G16B16Unorm; + return VK_FORMAT_R16G16B16_UNORM; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Unorm; + return VK_FORMAT_R16G16B16A16_UNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: - return vk::Format::eA2B10G10R10UnormPack32; + return VK_FORMAT_A2B10G10R10_UNORM_PACK32; default: break; } @@ -343,59 +346,69 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr case Maxwell::VertexAttribute::Type::SignedInt: switch (size) { case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Sint; + return VK_FORMAT_R16G16B16A16_SINT; case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Sint; + return VK_FORMAT_R8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Sint; + return VK_FORMAT_R8G8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Sint; + return VK_FORMAT_R8G8B8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Sint; + return VK_FORMAT_R8G8B8A8_SINT; case Maxwell::VertexAttribute::Size::Size_32: - return vk::Format::eR32Sint; + return VK_FORMAT_R32_SINT; default: break; } + break; case Maxwell::VertexAttribute::Type::UnsignedInt: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Uint; + return VK_FORMAT_R8_UINT; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Uint; + return VK_FORMAT_R8G8_UINT; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Uint; + return VK_FORMAT_R8G8B8_UINT; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Uint; + return VK_FORMAT_R8G8B8A8_UINT; + case Maxwell::VertexAttribute::Size::Size_16: + return VK_FORMAT_R16_UINT; + case Maxwell::VertexAttribute::Size::Size_16_16: + return VK_FORMAT_R16G16_UINT; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return VK_FORMAT_R16G16B16_UINT; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return VK_FORMAT_R16G16B16A16_UINT; case Maxwell::VertexAttribute::Size::Size_32: - return vk::Format::eR32Uint; + return VK_FORMAT_R32_UINT; case Maxwell::VertexAttribute::Size::Size_32_32: - return vk::Format::eR32G32Uint; + return VK_FORMAT_R32G32_UINT; case Maxwell::VertexAttribute::Size::Size_32_32_32: - return vk::Format::eR32G32B32Uint; + return VK_FORMAT_R32G32B32_UINT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: - return vk::Format::eR32G32B32A32Uint; + return VK_FORMAT_R32G32B32A32_UINT; default: break; } + break; case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Uscaled; + return VK_FORMAT_R8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Uscaled; + return VK_FORMAT_R8G8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Uscaled; + return VK_FORMAT_R8G8B8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Uscaled; + return VK_FORMAT_R8G8B8A8_USCALED; case Maxwell::VertexAttribute::Size::Size_16: - return vk::Format::eR16Uscaled; + return VK_FORMAT_R16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return vk::Format::eR16G16Uscaled; + return VK_FORMAT_R16G16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return vk::Format::eR16G16B16Uscaled; + return VK_FORMAT_R16G16B16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Uscaled; + return VK_FORMAT_R16G16B16A16_USCALED; default: break; } @@ -403,21 +416,21 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr case Maxwell::VertexAttribute::Type::SignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return vk::Format::eR8Sscaled; + return VK_FORMAT_R8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return vk::Format::eR8G8Sscaled; + return VK_FORMAT_R8G8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return vk::Format::eR8G8B8Sscaled; + return VK_FORMAT_R8G8B8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return vk::Format::eR8G8B8A8Sscaled; + return VK_FORMAT_R8G8B8A8_SSCALED; case Maxwell::VertexAttribute::Size::Size_16: - return vk::Format::eR16Sscaled; + return VK_FORMAT_R16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return vk::Format::eR16G16Sscaled; + return VK_FORMAT_R16G16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return vk::Format::eR16G16B16Sscaled; + return VK_FORMAT_R16G16B16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Sscaled; + return VK_FORMAT_R16G16B16A16_SSCALED; default: break; } @@ -425,21 +438,21 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr case Maxwell::VertexAttribute::Type::Float: switch (size) { case Maxwell::VertexAttribute::Size::Size_32: - return vk::Format::eR32Sfloat; + return VK_FORMAT_R32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32: - return vk::Format::eR32G32Sfloat; + return VK_FORMAT_R32G32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32_32: - return vk::Format::eR32G32B32Sfloat; + return VK_FORMAT_R32G32B32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: - return vk::Format::eR32G32B32A32Sfloat; + return VK_FORMAT_R32G32B32A32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16: - return vk::Format::eR16Sfloat; + return VK_FORMAT_R16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16: - return vk::Format::eR16G16Sfloat; + return VK_FORMAT_R16G16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return vk::Format::eR16G16B16Sfloat; + return VK_FORMAT_R16G16B16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return vk::Format::eR16G16B16A16Sfloat; + return VK_FORMAT_R16G16B16A16_SFLOAT; default: break; } @@ -450,210 +463,210 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr return {}; } -vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { +VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { switch (comparison) { case Maxwell::ComparisonOp::Never: case Maxwell::ComparisonOp::NeverOld: - return vk::CompareOp::eNever; + return VK_COMPARE_OP_NEVER; case Maxwell::ComparisonOp::Less: case Maxwell::ComparisonOp::LessOld: - return vk::CompareOp::eLess; + return VK_COMPARE_OP_LESS; case Maxwell::ComparisonOp::Equal: case Maxwell::ComparisonOp::EqualOld: - return vk::CompareOp::eEqual; + return VK_COMPARE_OP_EQUAL; case Maxwell::ComparisonOp::LessEqual: case Maxwell::ComparisonOp::LessEqualOld: - return vk::CompareOp::eLessOrEqual; + return VK_COMPARE_OP_LESS_OR_EQUAL; case Maxwell::ComparisonOp::Greater: case Maxwell::ComparisonOp::GreaterOld: - return vk::CompareOp::eGreater; + return VK_COMPARE_OP_GREATER; case Maxwell::ComparisonOp::NotEqual: case Maxwell::ComparisonOp::NotEqualOld: - return vk::CompareOp::eNotEqual; + return VK_COMPARE_OP_NOT_EQUAL; case Maxwell::ComparisonOp::GreaterEqual: case Maxwell::ComparisonOp::GreaterEqualOld: - return vk::CompareOp::eGreaterOrEqual; + return VK_COMPARE_OP_GREATER_OR_EQUAL; case Maxwell::ComparisonOp::Always: case Maxwell::ComparisonOp::AlwaysOld: - return vk::CompareOp::eAlways; + return VK_COMPARE_OP_ALWAYS; } UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); return {}; } -vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { +VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { switch (index_format) { case Maxwell::IndexFormat::UnsignedByte: if (!device.IsExtIndexTypeUint8Supported()) { UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device"); - return vk::IndexType::eUint16; + return VK_INDEX_TYPE_UINT16; } - return vk::IndexType::eUint8EXT; + return VK_INDEX_TYPE_UINT8_EXT; case Maxwell::IndexFormat::UnsignedShort: - return vk::IndexType::eUint16; + return VK_INDEX_TYPE_UINT16; case Maxwell::IndexFormat::UnsignedInt: - return vk::IndexType::eUint32; + return VK_INDEX_TYPE_UINT32; } UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format)); return {}; } -vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) { +VkStencilOp StencilOp(Maxwell::StencilOp stencil_op) { switch (stencil_op) { case Maxwell::StencilOp::Keep: case Maxwell::StencilOp::KeepOGL: - return vk::StencilOp::eKeep; + return VK_STENCIL_OP_KEEP; case Maxwell::StencilOp::Zero: case Maxwell::StencilOp::ZeroOGL: - return vk::StencilOp::eZero; + return VK_STENCIL_OP_ZERO; case Maxwell::StencilOp::Replace: case Maxwell::StencilOp::ReplaceOGL: - return vk::StencilOp::eReplace; + return VK_STENCIL_OP_REPLACE; case Maxwell::StencilOp::Incr: case Maxwell::StencilOp::IncrOGL: - return vk::StencilOp::eIncrementAndClamp; + return VK_STENCIL_OP_INCREMENT_AND_CLAMP; case Maxwell::StencilOp::Decr: case Maxwell::StencilOp::DecrOGL: - return vk::StencilOp::eDecrementAndClamp; + return VK_STENCIL_OP_DECREMENT_AND_CLAMP; case Maxwell::StencilOp::Invert: case Maxwell::StencilOp::InvertOGL: - return vk::StencilOp::eInvert; + return VK_STENCIL_OP_INVERT; case Maxwell::StencilOp::IncrWrap: case Maxwell::StencilOp::IncrWrapOGL: - return vk::StencilOp::eIncrementAndWrap; + return VK_STENCIL_OP_INCREMENT_AND_WRAP; case Maxwell::StencilOp::DecrWrap: case Maxwell::StencilOp::DecrWrapOGL: - return vk::StencilOp::eDecrementAndWrap; + return VK_STENCIL_OP_DECREMENT_AND_WRAP; } UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op)); return {}; } -vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) { +VkBlendOp BlendEquation(Maxwell::Blend::Equation equation) { switch (equation) { case Maxwell::Blend::Equation::Add: case Maxwell::Blend::Equation::AddGL: - return vk::BlendOp::eAdd; + return VK_BLEND_OP_ADD; case Maxwell::Blend::Equation::Subtract: case Maxwell::Blend::Equation::SubtractGL: - return vk::BlendOp::eSubtract; + return VK_BLEND_OP_SUBTRACT; case Maxwell::Blend::Equation::ReverseSubtract: case Maxwell::Blend::Equation::ReverseSubtractGL: - return vk::BlendOp::eReverseSubtract; + return VK_BLEND_OP_REVERSE_SUBTRACT; case Maxwell::Blend::Equation::Min: case Maxwell::Blend::Equation::MinGL: - return vk::BlendOp::eMin; + return VK_BLEND_OP_MIN; case Maxwell::Blend::Equation::Max: case Maxwell::Blend::Equation::MaxGL: - return vk::BlendOp::eMax; + return VK_BLEND_OP_MAX; } UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); return {}; } -vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) { +VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor) { switch (factor) { case Maxwell::Blend::Factor::Zero: case Maxwell::Blend::Factor::ZeroGL: - return vk::BlendFactor::eZero; + return VK_BLEND_FACTOR_ZERO; case Maxwell::Blend::Factor::One: case Maxwell::Blend::Factor::OneGL: - return vk::BlendFactor::eOne; + return VK_BLEND_FACTOR_ONE; case Maxwell::Blend::Factor::SourceColor: case Maxwell::Blend::Factor::SourceColorGL: - return vk::BlendFactor::eSrcColor; + return VK_BLEND_FACTOR_SRC_COLOR; case Maxwell::Blend::Factor::OneMinusSourceColor: case Maxwell::Blend::Factor::OneMinusSourceColorGL: - return vk::BlendFactor::eOneMinusSrcColor; + return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; case Maxwell::Blend::Factor::SourceAlpha: case Maxwell::Blend::Factor::SourceAlphaGL: - return vk::BlendFactor::eSrcAlpha; + return VK_BLEND_FACTOR_SRC_ALPHA; case Maxwell::Blend::Factor::OneMinusSourceAlpha: case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: - return vk::BlendFactor::eOneMinusSrcAlpha; + return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; case Maxwell::Blend::Factor::DestAlpha: case Maxwell::Blend::Factor::DestAlphaGL: - return vk::BlendFactor::eDstAlpha; + return VK_BLEND_FACTOR_DST_ALPHA; case Maxwell::Blend::Factor::OneMinusDestAlpha: case Maxwell::Blend::Factor::OneMinusDestAlphaGL: - return vk::BlendFactor::eOneMinusDstAlpha; + return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; case Maxwell::Blend::Factor::DestColor: case Maxwell::Blend::Factor::DestColorGL: - return vk::BlendFactor::eDstColor; + return VK_BLEND_FACTOR_DST_COLOR; case Maxwell::Blend::Factor::OneMinusDestColor: case Maxwell::Blend::Factor::OneMinusDestColorGL: - return vk::BlendFactor::eOneMinusDstColor; + return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; case Maxwell::Blend::Factor::SourceAlphaSaturate: case Maxwell::Blend::Factor::SourceAlphaSaturateGL: - return vk::BlendFactor::eSrcAlphaSaturate; + return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE; case Maxwell::Blend::Factor::Source1Color: case Maxwell::Blend::Factor::Source1ColorGL: - return vk::BlendFactor::eSrc1Color; + return VK_BLEND_FACTOR_SRC1_COLOR; case Maxwell::Blend::Factor::OneMinusSource1Color: case Maxwell::Blend::Factor::OneMinusSource1ColorGL: - return vk::BlendFactor::eOneMinusSrc1Color; + return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR; case Maxwell::Blend::Factor::Source1Alpha: case Maxwell::Blend::Factor::Source1AlphaGL: - return vk::BlendFactor::eSrc1Alpha; + return VK_BLEND_FACTOR_SRC1_ALPHA; case Maxwell::Blend::Factor::OneMinusSource1Alpha: case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: - return vk::BlendFactor::eOneMinusSrc1Alpha; + return VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; case Maxwell::Blend::Factor::ConstantColor: case Maxwell::Blend::Factor::ConstantColorGL: - return vk::BlendFactor::eConstantColor; + return VK_BLEND_FACTOR_CONSTANT_COLOR; case Maxwell::Blend::Factor::OneMinusConstantColor: case Maxwell::Blend::Factor::OneMinusConstantColorGL: - return vk::BlendFactor::eOneMinusConstantColor; + return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; case Maxwell::Blend::Factor::ConstantAlpha: case Maxwell::Blend::Factor::ConstantAlphaGL: - return vk::BlendFactor::eConstantAlpha; + return VK_BLEND_FACTOR_CONSTANT_ALPHA; case Maxwell::Blend::Factor::OneMinusConstantAlpha: case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: - return vk::BlendFactor::eOneMinusConstantAlpha; + return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; } UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); return {}; } -vk::FrontFace FrontFace(Maxwell::FrontFace front_face) { +VkFrontFace FrontFace(Maxwell::FrontFace front_face) { switch (front_face) { case Maxwell::FrontFace::ClockWise: - return vk::FrontFace::eClockwise; + return VK_FRONT_FACE_CLOCKWISE; case Maxwell::FrontFace::CounterClockWise: - return vk::FrontFace::eCounterClockwise; + return VK_FRONT_FACE_COUNTER_CLOCKWISE; } UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); return {}; } -vk::CullModeFlags CullFace(Maxwell::CullFace cull_face) { +VkCullModeFlags CullFace(Maxwell::CullFace cull_face) { switch (cull_face) { case Maxwell::CullFace::Front: - return vk::CullModeFlagBits::eFront; + return VK_CULL_MODE_FRONT_BIT; case Maxwell::CullFace::Back: - return vk::CullModeFlagBits::eBack; + return VK_CULL_MODE_BACK_BIT; case Maxwell::CullFace::FrontAndBack: - return vk::CullModeFlagBits::eFrontAndBack; + return VK_CULL_MODE_FRONT_AND_BACK; } UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); return {}; } -vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { +VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { switch (swizzle) { case Tegra::Texture::SwizzleSource::Zero: - return vk::ComponentSwizzle::eZero; + return VK_COMPONENT_SWIZZLE_ZERO; case Tegra::Texture::SwizzleSource::R: - return vk::ComponentSwizzle::eR; + return VK_COMPONENT_SWIZZLE_R; case Tegra::Texture::SwizzleSource::G: - return vk::ComponentSwizzle::eG; + return VK_COMPONENT_SWIZZLE_G; case Tegra::Texture::SwizzleSource::B: - return vk::ComponentSwizzle::eB; + return VK_COMPONENT_SWIZZLE_B; case Tegra::Texture::SwizzleSource::A: - return vk::ComponentSwizzle::eA; + return VK_COMPONENT_SWIZZLE_A; case Tegra::Texture::SwizzleSource::OneInt: case Tegra::Texture::SwizzleSource::OneFloat: - return vk::ComponentSwizzle::eOne; + return VK_COMPONENT_SWIZZLE_ONE; } UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle)); return {}; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 24f6ab544..81bce4c6c 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -6,8 +6,8 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" @@ -18,46 +18,45 @@ using PixelFormat = VideoCore::Surface::PixelFormat; namespace Sampler { -vk::Filter Filter(Tegra::Texture::TextureFilter filter); +VkFilter Filter(Tegra::Texture::TextureFilter filter); -vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); +VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); -vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, - Tegra::Texture::TextureFilter filter); +VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, + Tegra::Texture::TextureFilter filter); -vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); +VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); } // namespace Sampler struct FormatInfo { - vk::Format format; + VkFormat format; bool attachable; bool storage; }; FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format); -vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); +VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); -vk::PrimitiveTopology PrimitiveTopology(const VKDevice& device, - Maxwell::PrimitiveTopology topology); +VkPrimitiveTopology PrimitiveTopology(const VKDevice& device, Maxwell::PrimitiveTopology topology); -vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); +VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); -vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison); +VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); -vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); +VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); -vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op); +VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); -vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation); +VkBlendOp BlendEquation(Maxwell::Blend::Equation equation); -vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); +VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor); -vk::FrontFace FrontFace(Maxwell::FrontFace front_face); +VkFrontFace FrontFace(Maxwell::FrontFace front_face); -vk::CullModeFlags CullFace(Maxwell::CullFace cull_face); +VkCullModeFlags CullFace(Maxwell::CullFace cull_face); -vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); +VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp new file mode 100644 index 000000000..435c8c1b8 --- /dev/null +++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp @@ -0,0 +1,220 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#ifdef HAS_NSIGHT_AFTERMATH + +#include <mutex> +#include <string> +#include <string_view> +#include <utility> +#include <vector> + +#include <fmt/format.h> + +#define VK_NO_PROTOTYPES +#include <vulkan/vulkan.h> + +#include <GFSDK_Aftermath.h> +#include <GFSDK_Aftermath_Defines.h> +#include <GFSDK_Aftermath_GpuCrashDump.h> +#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h> + +#include "common/common_paths.h" +#include "common/common_types.h" +#include "common/file_util.h" +#include "common/logging/log.h" +#include "common/scope_exit.h" + +#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" + +namespace Vulkan { + +static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll"; + +NsightAftermathTracker::NsightAftermathTracker() = default; + +NsightAftermathTracker::~NsightAftermathTracker() { + if (initialized) { + (void)GFSDK_Aftermath_DisableGpuCrashDumps(); + } +} + +bool NsightAftermathTracker::Initialize() { + if (!dl.Open(AFTERMATH_LIB_NAME)) { + LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL"); + return false; + } + + if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps", + &GFSDK_Aftermath_DisableGpuCrashDumps) || + !dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps", + &GFSDK_Aftermath_EnableGpuCrashDumps) || + !dl.GetSymbol("GFSDK_Aftermath_GetShaderDebugInfoIdentifier", + &GFSDK_Aftermath_GetShaderDebugInfoIdentifier) || + !dl.GetSymbol("GFSDK_Aftermath_GetShaderHashSpirv", &GFSDK_Aftermath_GetShaderHashSpirv) || + !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_CreateDecoder", + &GFSDK_Aftermath_GpuCrashDump_CreateDecoder) || + !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_DestroyDecoder", + &GFSDK_Aftermath_GpuCrashDump_DestroyDecoder) || + !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GenerateJSON", + &GFSDK_Aftermath_GpuCrashDump_GenerateJSON) || + !dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON", + &GFSDK_Aftermath_GpuCrashDump_GetJSON)) { + LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers"); + return false; + } + + dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash"; + + (void)FileUtil::DeleteDirRecursively(dump_dir); + if (!FileUtil::CreateDir(dump_dir)) { + LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory"); + return false; + } + + if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps( + GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan, + GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback, + ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) { + LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed"); + return false; + } + + LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir); + + initialized = true; + return true; +} + +void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { + if (!initialized) { + return; + } + + std::vector<u32> spirv_copy = spirv; + GFSDK_Aftermath_SpirvCode shader; + shader.pData = spirv_copy.data(); + shader.size = static_cast<u32>(spirv_copy.size() * 4); + + std::scoped_lock lock{mutex}; + + GFSDK_Aftermath_ShaderHash hash; + if (!GFSDK_Aftermath_SUCCEED( + GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) { + LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module"); + return; + } + + FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb"); + if (!file.IsOpen()) { + LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); + return; + } + if (file.WriteArray(spirv.data(), spirv.size()) != spirv.size()) { + LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); + return; + } +} + +void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump, + u32 gpu_crash_dump_size) { + std::scoped_lock lock{mutex}; + + LOG_CRITICAL(Render_Vulkan, "called"); + + GFSDK_Aftermath_GpuCrashDump_Decoder decoder; + if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_CreateDecoder( + GFSDK_Aftermath_Version_API, gpu_crash_dump, gpu_crash_dump_size, &decoder))) { + LOG_ERROR(Render_Vulkan, "Failed to create decoder"); + return; + } + SCOPE_EXIT({ GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder); }); + + u32 json_size = 0; + if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON( + decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO, + GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr, + this, &json_size))) { + LOG_ERROR(Render_Vulkan, "Failed to generate JSON"); + return; + } + std::vector<char> json(json_size); + if (!GFSDK_Aftermath_SUCCEED( + GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, json_size, json.data()))) { + LOG_ERROR(Render_Vulkan, "Failed to query JSON"); + return; + } + + const std::string base_name = [this] { + const int id = dump_id++; + if (id == 0) { + return fmt::format("{}/crash.nv-gpudmp", dump_dir); + } else { + return fmt::format("{}/crash_{}.nv-gpudmp", dump_dir, id); + } + }(); + + std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size); + if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) { + LOG_ERROR(Render_Vulkan, "Failed to write dump file"); + return; + } + const std::string_view json_view(json.data(), json.size()); + if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) { + LOG_ERROR(Render_Vulkan, "Failed to write JSON"); + return; + } +} + +void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_info, + u32 shader_debug_info_size) { + std::scoped_lock lock{mutex}; + + GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier; + if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GetShaderDebugInfoIdentifier( + GFSDK_Aftermath_Version_API, shader_debug_info, shader_debug_info_size, &identifier))) { + LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_GetShaderDebugInfoIdentifier failed"); + return; + } + + const std::string path = + fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]); + FileUtil::IOFile file(path, "wb"); + if (!file.IsOpen()) { + LOG_ERROR(Render_Vulkan, "Failed to create file {}", path); + return; + } + if (file.WriteBytes(static_cast<const u8*>(shader_debug_info), shader_debug_info_size) != + shader_debug_info_size) { + LOG_ERROR(Render_Vulkan, "Failed to write file {}", path); + return; + } +} + +void NsightAftermathTracker::OnCrashDumpDescriptionCallback( + PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description) { + add_description(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, "yuzu"); +} + +void NsightAftermathTracker::GpuCrashDumpCallback(const void* gpu_crash_dump, + u32 gpu_crash_dump_size, void* user_data) { + static_cast<NsightAftermathTracker*>(user_data)->OnGpuCrashDumpCallback(gpu_crash_dump, + gpu_crash_dump_size); +} + +void NsightAftermathTracker::ShaderDebugInfoCallback(const void* shader_debug_info, + u32 shader_debug_info_size, void* user_data) { + static_cast<NsightAftermathTracker*>(user_data)->OnShaderDebugInfoCallback( + shader_debug_info, shader_debug_info_size); +} + +void NsightAftermathTracker::CrashDumpDescriptionCallback( + PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data) { + static_cast<NsightAftermathTracker*>(user_data)->OnCrashDumpDescriptionCallback( + add_description); +} + +} // namespace Vulkan + +#endif // HAS_NSIGHT_AFTERMATH diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h new file mode 100644 index 000000000..afe7ae99e --- /dev/null +++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h @@ -0,0 +1,87 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <mutex> +#include <string> +#include <vector> + +#define VK_NO_PROTOTYPES +#include <vulkan/vulkan.h> + +#ifdef HAS_NSIGHT_AFTERMATH +#include <GFSDK_Aftermath_Defines.h> +#include <GFSDK_Aftermath_GpuCrashDump.h> +#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h> +#endif + +#include "common/common_types.h" +#include "common/dynamic_library.h" + +namespace Vulkan { + +class NsightAftermathTracker { +public: + NsightAftermathTracker(); + ~NsightAftermathTracker(); + + NsightAftermathTracker(const NsightAftermathTracker&) = delete; + NsightAftermathTracker& operator=(const NsightAftermathTracker&) = delete; + + // Delete move semantics because Aftermath initialization uses a pointer to this. + NsightAftermathTracker(NsightAftermathTracker&&) = delete; + NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; + + bool Initialize(); + + void SaveShader(const std::vector<u32>& spirv) const; + +private: +#ifdef HAS_NSIGHT_AFTERMATH + static void GpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size, + void* user_data); + + static void ShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size, + void* user_data); + + static void CrashDumpDescriptionCallback( + PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data); + + void OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size); + + void OnShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size); + + void OnCrashDumpDescriptionCallback( + PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description); + + mutable std::mutex mutex; + + std::string dump_dir; + int dump_id = 0; + + bool initialized = false; + + Common::DynamicLibrary dl; + PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; + PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; + PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; + PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; + PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; + PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; + PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; + PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; +#endif +}; + +#ifndef HAS_NSIGHT_AFTERMATH +inline NsightAftermathTracker::NsightAftermathTracker() = default; +inline NsightAftermathTracker::~NsightAftermathTracker() = default; +inline bool NsightAftermathTracker::Initialize() { + return false; +} +inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {} +#endif + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 9cdb4b627..04532f8f8 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -24,7 +24,6 @@ #include "core/settings.h" #include "core/telemetry_session.h" #include "video_core/gpu.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_device.h" @@ -34,15 +33,16 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/wrapper.h" -// Include these late to avoid changing Vulkan-Hpp's dynamic dispatcher size +// Include these late to avoid polluting previous headers #ifdef _WIN32 #include <windows.h> // ensure include order #include <vulkan/vulkan_win32.h> #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) #include <X11/Xlib.h> #include <vulkan/vulkan_wayland.h> #include <vulkan/vulkan_xlib.h> @@ -54,20 +54,19 @@ namespace { using Core::Frontend::WindowSystemType; -VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_, +VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* data, [[maybe_unused]] void* user_data) { - const auto severity{static_cast<vk::DebugUtilsMessageSeverityFlagBitsEXT>(severity_)}; const char* message{data->pMessage}; - if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) { + if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { LOG_CRITICAL(Render_Vulkan, "{}", message); - } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning) { + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { LOG_WARNING(Render_Vulkan, "{}", message); - } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo) { + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { LOG_INFO(Render_Vulkan, "{}", message); - } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose) { + } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { LOG_DEBUG(Render_Vulkan, "{}", message); } return VK_FALSE; @@ -94,22 +93,24 @@ Common::DynamicLibrary OpenVulkanLibrary() { return library; } -UniqueInstance CreateInstance(Common::DynamicLibrary& library, vk::DispatchLoaderDynamic& dld, - WindowSystemType window_type = WindowSystemType::Headless, - bool enable_layers = false) { +vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatch& dld, + WindowSystemType window_type = WindowSystemType::Headless, + bool enable_layers = false) { if (!library.IsOpen()) { LOG_ERROR(Render_Vulkan, "Vulkan library not available"); - return UniqueInstance{}; + return {}; } - PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; - if (!library.GetSymbol("vkGetInstanceProcAddr", &vkGetInstanceProcAddr)) { + if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) { LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); - return UniqueInstance{}; + return {}; + } + if (!vk::Load(dld)) { + LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); + return {}; } - dld.init(vkGetInstanceProcAddr); std::vector<const char*> extensions; - extensions.reserve(4); + extensions.reserve(6); switch (window_type) { case Core::Frontend::WindowSystemType::Headless: break; @@ -118,7 +119,7 @@ UniqueInstance CreateInstance(Common::DynamicLibrary& library, vk::DispatchLoade extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); break; #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) case Core::Frontend::WindowSystemType::X11: extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); break; @@ -136,45 +137,39 @@ UniqueInstance CreateInstance(Common::DynamicLibrary& library, vk::DispatchLoade if (enable_layers) { extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } + extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); - u32 num_properties; - if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, nullptr, dld) != - vk::Result::eSuccess) { - LOG_ERROR(Render_Vulkan, "Failed to query number of extension properties"); - return UniqueInstance{}; - } - std::vector<vk::ExtensionProperties> properties(num_properties); - if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, properties.data(), - dld) != vk::Result::eSuccess) { + const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); + if (!properties) { LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); - return UniqueInstance{}; + return {}; } for (const char* extension : extensions) { const auto it = - std::find_if(properties.begin(), properties.end(), [extension](const auto& prop) { + std::find_if(properties->begin(), properties->end(), [extension](const auto& prop) { return !std::strcmp(extension, prop.extensionName); }); - if (it == properties.end()) { + if (it == properties->end()) { LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); - return UniqueInstance{}; + return {}; } } - const vk::ApplicationInfo application_info("yuzu Emulator", VK_MAKE_VERSION(0, 1, 0), - "yuzu Emulator", VK_MAKE_VERSION(0, 1, 0), - VK_API_VERSION_1_1); - const std::array layers = {"VK_LAYER_LUNARG_standard_validation"}; - const vk::InstanceCreateInfo instance_ci( - {}, &application_info, enable_layers ? static_cast<u32>(layers.size()) : 0, layers.data(), - static_cast<u32>(extensions.size()), extensions.data()); - vk::Instance unsafe_instance; - if (vk::createInstance(&instance_ci, nullptr, &unsafe_instance, dld) != vk::Result::eSuccess) { + static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"}; + vk::Span<const char*> layers = layers_data; + if (!enable_layers) { + layers = {}; + } + vk::Instance instance = vk::Instance::Create(layers, extensions, dld); + if (!instance) { LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); - return UniqueInstance{}; + return {}; + } + if (!vk::Load(*instance, dld)) { + LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); } - dld.init(unsafe_instance); - return UniqueInstance(unsafe_instance, {nullptr, dld}); + return instance; } std::string GetReadableVersion(u32 version) { @@ -187,14 +182,14 @@ std::string GetDriverVersion(const VKDevice& device) { // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 const u32 version = device.GetDriverVersion(); - if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { + if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { const u32 major = (version >> 22) & 0x3ff; const u32 minor = (version >> 14) & 0x0ff; const u32 secondary = (version >> 6) & 0x0ff; const u32 tertiary = version & 0x003f; return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary); } - if (device.GetDriverID() == vk::DriverIdKHR::eIntelProprietaryWindows) { + if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { const u32 major = version >> 14; const u32 minor = version & 0x3fff; return fmt::format("{}.{}", major, minor); @@ -307,10 +302,8 @@ void RendererVulkan::ShutDown() { if (!device) { return; } - const auto dev = device->GetLogical(); - const auto& dld = device->GetDispatchLoader(); - if (dev && dld.vkDeviceWaitIdle) { - dev.waitIdle(dld); + if (const auto& dev = device->GetLogical()) { + dev.WaitIdle(); } rasterizer.reset(); @@ -326,23 +319,11 @@ bool RendererVulkan::CreateDebugCallback() { if (!Settings::values.renderer_debug) { return true; } - const vk::DebugUtilsMessengerCreateInfoEXT callback_ci( - {}, - vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | - vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning | - vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo | - vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose, - vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral | - vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | - vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance, - &DebugCallback, nullptr); - vk::DebugUtilsMessengerEXT unsafe_callback; - if (instance->createDebugUtilsMessengerEXT(&callback_ci, nullptr, &unsafe_callback, dld) != - vk::Result::eSuccess) { + debug_callback = instance.TryCreateDebugCallback(DebugCallback); + if (!debug_callback) { LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); return false; } - debug_callback = UniqueDebugUtilsMessengerEXT(unsafe_callback, {*instance, nullptr, dld}); return true; } @@ -357,14 +338,14 @@ bool RendererVulkan::CreateSurface() { nullptr, 0, nullptr, hWnd}; const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>( dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR")); - if (!vkCreateWin32SurfaceKHR || vkCreateWin32SurfaceKHR(instance.get(), &win32_ci, nullptr, - &unsafe_surface) != VK_SUCCESS) { + if (!vkCreateWin32SurfaceKHR || + vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); return false; } } #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) if (window_info.type == Core::Frontend::WindowSystemType::X11) { const VkXlibSurfaceCreateInfoKHR xlib_ci{ VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, @@ -372,8 +353,8 @@ bool RendererVulkan::CreateSurface() { reinterpret_cast<Window>(window_info.render_surface)}; const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>( dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR")); - if (!vkCreateXlibSurfaceKHR || vkCreateXlibSurfaceKHR(instance.get(), &xlib_ci, nullptr, - &unsafe_surface) != VK_SUCCESS) { + if (!vkCreateXlibSurfaceKHR || + vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); return false; } @@ -386,7 +367,7 @@ bool RendererVulkan::CreateSurface() { const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>( dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR")); if (!vkCreateWaylandSurfaceKHR || - vkCreateWaylandSurfaceKHR(instance.get(), &wayland_ci, nullptr, &unsafe_surface) != + vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); return false; @@ -398,26 +379,30 @@ bool RendererVulkan::CreateSurface() { return false; } - surface = UniqueSurfaceKHR(unsafe_surface, {*instance, nullptr, dld}); + surface = vk::SurfaceKHR(unsafe_surface, *instance, dld); return true; } bool RendererVulkan::PickDevices() { - const auto devices = instance->enumeratePhysicalDevices(dld); + const auto devices = instance.EnumeratePhysicalDevices(); + if (!devices) { + LOG_ERROR(Render_Vulkan, "Failed to enumerate physical devices"); + return false; + } const s32 device_index = Settings::values.vulkan_device; - if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { + if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) { LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); return false; } - const vk::PhysicalDevice physical_device = devices[static_cast<std::size_t>(device_index)]; - - if (!VKDevice::IsSuitable(physical_device, *surface, dld)) { + const vk::PhysicalDevice physical_device((*devices)[static_cast<std::size_t>(device_index)], + dld); + if (!VKDevice::IsSuitable(physical_device, *surface)) { return false; } - device = std::make_unique<VKDevice>(dld, physical_device, *surface); - return device->Create(*instance); + device = std::make_unique<VKDevice>(*instance, physical_device, *surface, dld); + return device->Create(); } void RendererVulkan::Report() const { @@ -444,30 +429,22 @@ void RendererVulkan::Report() const { } std::vector<std::string> RendererVulkan::EnumerateDevices() { - // Avoid putting DispatchLoaderDynamic, it's too large - auto dld_memory = std::make_unique<vk::DispatchLoaderDynamic>(); - auto& dld = *dld_memory; - + vk::InstanceDispatch dld; Common::DynamicLibrary library = OpenVulkanLibrary(); - UniqueInstance instance = CreateInstance(library, dld); + vk::Instance instance = CreateInstance(library, dld); if (!instance) { return {}; } - u32 num_devices; - if (instance->enumeratePhysicalDevices(&num_devices, nullptr, dld) != vk::Result::eSuccess) { - return {}; - } - std::vector<vk::PhysicalDevice> devices(num_devices); - if (instance->enumeratePhysicalDevices(&num_devices, devices.data(), dld) != - vk::Result::eSuccess) { + const std::optional physical_devices = instance.EnumeratePhysicalDevices(); + if (!physical_devices) { return {}; } std::vector<std::string> names; - names.reserve(num_devices); - for (auto& device : devices) { - names.push_back(device.getProperties(dld).deviceName); + names.reserve(physical_devices->size()); + for (const auto& device : *physical_devices) { + names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); } return names; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 42e253de5..18270909b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -12,7 +12,7 @@ #include "common/dynamic_library.h" #include "video_core/renderer_base.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Core { class System; @@ -61,14 +61,14 @@ private: Core::System& system; Common::DynamicLibrary library; - vk::DispatchLoaderDynamic dld; + vk::InstanceDispatch dld; - UniqueInstance instance; - UniqueSurfaceKHR surface; + vk::Instance instance; + vk::SurfaceKHR surface; VKScreenInfo screen_info; - UniqueDebugUtilsMessengerEXT debug_callback; + vk::DebugCallback debug_callback; std::unique_ptr<VKDevice> device; std::unique_ptr<VKSwapchain> swapchain; std::unique_ptr<VKMemoryManager> memory_manager; diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp new file mode 100644 index 000000000..5a472ba9b --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp @@ -0,0 +1,50 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +/* + * Build instructions: + * $ glslangValidator -V quad_indexed.comp -o output.spv + * $ spirv-opt -O --strip-debug output.spv -o optimized.spv + * $ xxd -i optimized.spv + * + * Then copy that bytecode to the C++ file + */ + +#version 460 core + +layout (local_size_x = 1024) in; + +layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { + uint input_indexes[]; +}; + +layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { + uint output_indexes[]; +}; + +layout (push_constant) uniform PushConstants { + uint base_vertex; + int index_shift; // 0: uint8, 1: uint16, 2: uint32 +}; + +void main() { + int primitive = int(gl_GlobalInvocationID.x); + if (primitive * 6 >= output_indexes.length()) { + return; + } + + int index_size = 8 << index_shift; + int flipped_shift = 2 - index_shift; + int mask = (1 << flipped_shift) - 1; + + const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); + for (uint vertex = 0; vertex < 6; ++vertex) { + int offset = primitive * 4 + quad_swizzle[vertex]; + int int_offset = offset >> flipped_shift; + int bit_offset = (offset & mask) * index_size; + uint packed_input = input_indexes[int_offset]; + uint index = bitfieldExtract(packed_input, bit_offset, index_size); + output_indexes[primitive * 6 + vertex] = index + base_vertex; + } +} diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 855cfc883..fbd406f2b 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -20,7 +20,6 @@ #include "video_core/gpu.h" #include "video_core/morton.h" #include "video_core/rasterizer_interface.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_device.h" @@ -30,6 +29,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" namespace Vulkan { @@ -140,16 +140,25 @@ struct ScreenRectVertex { std::array<f32, 2> position; std::array<f32, 2> tex_coord; - static vk::VertexInputBindingDescription GetDescription() { - return vk::VertexInputBindingDescription(0, sizeof(ScreenRectVertex), - vk::VertexInputRate::eVertex); + static VkVertexInputBindingDescription GetDescription() { + VkVertexInputBindingDescription description; + description.binding = 0; + description.stride = sizeof(ScreenRectVertex); + description.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + return description; } - static std::array<vk::VertexInputAttributeDescription, 2> GetAttributes() { - return {vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32Sfloat, - offsetof(ScreenRectVertex, position)), - vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32Sfloat, - offsetof(ScreenRectVertex, tex_coord))}; + static std::array<VkVertexInputAttributeDescription, 2> GetAttributes() { + std::array<VkVertexInputAttributeDescription, 2> attributes; + attributes[0].location = 0; + attributes[0].binding = 0; + attributes[0].format = VK_FORMAT_R32G32_SFLOAT; + attributes[0].offset = offsetof(ScreenRectVertex, position); + attributes[1].location = 1; + attributes[1].binding = 0; + attributes[1].format = VK_FORMAT_R32G32_SFLOAT; + attributes[1].offset = offsetof(ScreenRectVertex, tex_coord); + return attributes; } }; @@ -172,16 +181,16 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer); } -vk::Format GetFormat(const Tegra::FramebufferConfig& framebuffer) { +VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { switch (framebuffer.pixel_format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return vk::Format::eA8B8G8R8UnormPack32; + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; case Tegra::FramebufferConfig::PixelFormat::RGB565: - return vk::Format::eR5G6B5UnormPack16; + return VK_FORMAT_R5G6B5_UNORM_PACK16; default: UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", static_cast<u32>(framebuffer.pixel_format)); - return vk::Format::eA8B8G8R8UnormPack32; + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; } } @@ -219,8 +228,8 @@ void VKBlitScreen::Recreate() { CreateDynamicResources(); } -std::tuple<VKFence&, vk::Semaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated) { +std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, + bool use_accelerated) { RefreshResources(framebuffer); // Finish any pending renderpass @@ -255,46 +264,76 @@ std::tuple<VKFence&, vk::Semaphore> VKBlitScreen::Draw(const Tegra::FramebufferC framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, map.GetAddress() + image_offset, host_ptr); - blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferWrite, - vk::ImageLayout::eTransferDstOptimal); - - const vk::BufferImageCopy copy(image_offset, 0, 0, - {vk::ImageAspectFlagBits::eColor, 0, 0, 1}, {0, 0, 0}, - {framebuffer.width, framebuffer.height, 1}); - scheduler.Record([buffer_handle = *buffer, image = blit_image->GetHandle(), - copy](auto cmdbuf, auto& dld) { - cmdbuf.copyBufferToImage(buffer_handle, image, vk::ImageLayout::eTransferDstOptimal, - {copy}, dld); - }); + blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + VkBufferImageCopy copy; + copy.bufferOffset = image_offset; + copy.bufferRowLength = 0; + copy.bufferImageHeight = 0; + copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copy.imageSubresource.mipLevel = 0; + copy.imageSubresource.baseArrayLayer = 0; + copy.imageSubresource.layerCount = 1; + copy.imageOffset.x = 0; + copy.imageOffset.y = 0; + copy.imageOffset.z = 0; + copy.imageExtent.width = framebuffer.width; + copy.imageExtent.height = framebuffer.height; + copy.imageExtent.depth = 1; + scheduler.Record( + [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); + }); } map.Release(); - blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eFragmentShader, - vk::AccessFlagBits::eShaderRead, - vk::ImageLayout::eShaderReadOnlyOptimal); + blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], descriptor_set = descriptor_sets[image_index], buffer = *buffer, size = swapchain.GetSize(), pipeline = *pipeline, - layout = *pipeline_layout](auto cmdbuf, auto& dld) { - const vk::ClearValue clear_color{std::array{0.0f, 0.0f, 0.0f, 1.0f}}; - const vk::RenderPassBeginInfo renderpass_bi(renderpass, framebuffer, {{0, 0}, size}, 1, - &clear_color); - - cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld); - cmdbuf.setViewport( - 0, - {{0.0f, 0.0f, static_cast<f32>(size.width), static_cast<f32>(size.height), 0.0f, 1.0f}}, - dld); - cmdbuf.setScissor(0, {{{0, 0}, size}}, dld); - - cmdbuf.bindVertexBuffers(0, {buffer}, {offsetof(BufferData, vertices)}, dld); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, {descriptor_set}, {}, - dld); - cmdbuf.draw(4, 1, 0, 0, dld); - cmdbuf.endRenderPass(dld); + layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + VkClearValue clear_color; + clear_color.color.float32[0] = 0.0f; + clear_color.color.float32[1] = 0.0f; + clear_color.color.float32[2] = 0.0f; + clear_color.color.float32[3] = 0.0f; + + VkRenderPassBeginInfo renderpass_bi; + renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + renderpass_bi.pNext = nullptr; + renderpass_bi.renderPass = renderpass; + renderpass_bi.framebuffer = framebuffer; + renderpass_bi.renderArea.offset.x = 0; + renderpass_bi.renderArea.offset.y = 0; + renderpass_bi.renderArea.extent = size; + renderpass_bi.clearValueCount = 1; + renderpass_bi.pClearValues = &clear_color; + + VkViewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast<float>(size.width); + viewport.height = static_cast<float>(size.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + + VkRect2D scissor; + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent = size; + + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + + cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); + cmdbuf.Draw(4, 1, 0, 0); + cmdbuf.EndRenderPass(); }); return {scheduler.GetFence(), *semaphores[image_index]}; @@ -334,165 +373,297 @@ void VKBlitScreen::CreateShaders() { } void VKBlitScreen::CreateSemaphores() { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - semaphores.resize(image_count); - for (std::size_t i = 0; i < image_count; ++i) { - semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld); - } + std::generate(semaphores.begin(), semaphores.end(), + [this] { return device.GetLogical().CreateSemaphore(); }); } void VKBlitScreen::CreateDescriptorPool() { - const std::array<vk::DescriptorPoolSize, 2> pool_sizes{ - vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, static_cast<u32>(image_count)}, - vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler, - static_cast<u32>(image_count)}}; - const vk::DescriptorPoolCreateInfo pool_ci( - {}, static_cast<u32>(image_count), static_cast<u32>(pool_sizes.size()), pool_sizes.data()); - const auto dev = device.GetLogical(); - descriptor_pool = dev.createDescriptorPoolUnique(pool_ci, nullptr, device.GetDispatchLoader()); + std::array<VkDescriptorPoolSize, 2> pool_sizes; + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + pool_sizes[0].descriptorCount = static_cast<u32>(image_count); + pool_sizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + pool_sizes[1].descriptorCount = static_cast<u32>(image_count); + + VkDescriptorPoolCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + ci.maxSets = static_cast<u32>(image_count); + ci.poolSizeCount = static_cast<u32>(pool_sizes.size()); + ci.pPoolSizes = pool_sizes.data(); + descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); } void VKBlitScreen::CreateRenderPass() { - const vk::AttachmentDescription color_attachment( - {}, swapchain.GetImageFormat(), vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eClear, - vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, - vk::AttachmentStoreOp::eDontCare, vk::ImageLayout::eUndefined, - vk::ImageLayout::ePresentSrcKHR); - - const vk::AttachmentReference color_attachment_ref(0, vk::ImageLayout::eColorAttachmentOptimal); - - const vk::SubpassDescription subpass_description({}, vk::PipelineBindPoint::eGraphics, 0, - nullptr, 1, &color_attachment_ref, nullptr, - nullptr, 0, nullptr); - - const vk::SubpassDependency dependency( - VK_SUBPASS_EXTERNAL, 0, vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::PipelineStageFlagBits::eColorAttachmentOutput, {}, - vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite, {}); - - const vk::RenderPassCreateInfo renderpass_ci({}, 1, &color_attachment, 1, &subpass_description, - 1, &dependency); - - const auto dev = device.GetLogical(); - renderpass = dev.createRenderPassUnique(renderpass_ci, nullptr, device.GetDispatchLoader()); + VkAttachmentDescription color_attachment; + color_attachment.flags = 0; + color_attachment.format = swapchain.GetImageFormat(); + color_attachment.samples = VK_SAMPLE_COUNT_1_BIT; + color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + color_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + color_attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + VkAttachmentReference color_attachment_ref; + color_attachment_ref.attachment = 0; + color_attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkSubpassDescription subpass_description; + subpass_description.flags = 0; + subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass_description.inputAttachmentCount = 0; + subpass_description.pInputAttachments = nullptr; + subpass_description.colorAttachmentCount = 1; + subpass_description.pColorAttachments = &color_attachment_ref; + subpass_description.pResolveAttachments = nullptr; + subpass_description.pDepthStencilAttachment = nullptr; + subpass_description.preserveAttachmentCount = 0; + subpass_description.pPreserveAttachments = nullptr; + + VkSubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency.srcAccessMask = 0; + dependency.dstAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dependency.dependencyFlags = 0; + + VkRenderPassCreateInfo renderpass_ci; + renderpass_ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + renderpass_ci.pNext = nullptr; + renderpass_ci.flags = 0; + renderpass_ci.attachmentCount = 1; + renderpass_ci.pAttachments = &color_attachment; + renderpass_ci.subpassCount = 1; + renderpass_ci.pSubpasses = &subpass_description; + renderpass_ci.dependencyCount = 1; + renderpass_ci.pDependencies = &dependency; + + renderpass = device.GetLogical().CreateRenderPass(renderpass_ci); } void VKBlitScreen::CreateDescriptorSetLayout() { - const std::array<vk::DescriptorSetLayoutBinding, 2> layout_bindings{ - vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eUniformBuffer, 1, - vk::ShaderStageFlagBits::eVertex, nullptr), - vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eCombinedImageSampler, 1, - vk::ShaderStageFlagBits::eFragment, nullptr)}; - const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci( - {}, static_cast<u32>(layout_bindings.size()), layout_bindings.data()); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld); + std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings; + layout_bindings[0].binding = 0; + layout_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + layout_bindings[0].descriptorCount = 1; + layout_bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + layout_bindings[0].pImmutableSamplers = nullptr; + layout_bindings[1].binding = 1; + layout_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + layout_bindings[1].descriptorCount = 1; + layout_bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + layout_bindings[1].pImmutableSamplers = nullptr; + + VkDescriptorSetLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.bindingCount = static_cast<u32>(layout_bindings.size()); + ci.pBindings = layout_bindings.data(); + + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); } void VKBlitScreen::CreateDescriptorSets() { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - - descriptor_sets.resize(image_count); - for (std::size_t i = 0; i < image_count; ++i) { - const vk::DescriptorSetLayout layout = *descriptor_set_layout; - const vk::DescriptorSetAllocateInfo descriptor_set_ai(*descriptor_pool, 1, &layout); - const vk::Result result = - dev.allocateDescriptorSets(&descriptor_set_ai, &descriptor_sets[i], dld); - ASSERT(result == vk::Result::eSuccess); - } + const std::vector layouts(image_count, *descriptor_set_layout); + + VkDescriptorSetAllocateInfo ai; + ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + ai.pNext = nullptr; + ai.descriptorPool = *descriptor_pool; + ai.descriptorSetCount = static_cast<u32>(image_count); + ai.pSetLayouts = layouts.data(); + descriptor_sets = descriptor_pool.Allocate(ai); } void VKBlitScreen::CreatePipelineLayout() { - const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &descriptor_set_layout.get(), 0, - nullptr); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - pipeline_layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); + VkPipelineLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.setLayoutCount = 1; + ci.pSetLayouts = descriptor_set_layout.address(); + ci.pushConstantRangeCount = 0; + ci.pPushConstantRanges = nullptr; + pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); } void VKBlitScreen::CreateGraphicsPipeline() { - const std::array shader_stages = { - vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eVertex, *vertex_shader, - "main", nullptr), - vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eFragment, *fragment_shader, - "main", nullptr)}; + std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages; + shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[0].pNext = nullptr; + shader_stages[0].flags = 0; + shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shader_stages[0].module = *vertex_shader; + shader_stages[0].pName = "main"; + shader_stages[0].pSpecializationInfo = nullptr; + shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[1].pNext = nullptr; + shader_stages[1].flags = 0; + shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + shader_stages[1].module = *fragment_shader; + shader_stages[1].pName = "main"; + shader_stages[1].pSpecializationInfo = nullptr; const auto vertex_binding_description = ScreenRectVertex::GetDescription(); const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); - const vk::PipelineVertexInputStateCreateInfo vertex_input( - {}, 1, &vertex_binding_description, static_cast<u32>(vertex_attrs_description.size()), - vertex_attrs_description.data()); - - const vk::PipelineInputAssemblyStateCreateInfo input_assembly( - {}, vk::PrimitiveTopology::eTriangleStrip, false); - - // Set a dummy viewport, it's going to be replaced by dynamic states. - const vk::Viewport viewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f); - const vk::Rect2D scissor({0, 0}, {1, 1}); - const vk::PipelineViewportStateCreateInfo viewport_state({}, 1, &viewport, 1, &scissor); - - const vk::PipelineRasterizationStateCreateInfo rasterizer( - {}, false, false, vk::PolygonMode::eFill, vk::CullModeFlagBits::eNone, - vk::FrontFace::eClockwise, false, 0.0f, 0.0f, 0.0f, 1.0f); - - const vk::PipelineMultisampleStateCreateInfo multisampling({}, vk::SampleCountFlagBits::e1, - false, 0.0f, nullptr, false, false); - - const vk::PipelineColorBlendAttachmentState color_blend_attachment( - false, vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd, - vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd, - vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | - vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA); - - const vk::PipelineColorBlendStateCreateInfo color_blending( - {}, false, vk::LogicOp::eCopy, 1, &color_blend_attachment, {0.0f, 0.0f, 0.0f, 0.0f}); - - const std::array<vk::DynamicState, 2> dynamic_states = {vk::DynamicState::eViewport, - vk::DynamicState::eScissor}; - - const vk::PipelineDynamicStateCreateInfo dynamic_state( - {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data()); - - const vk::GraphicsPipelineCreateInfo pipeline_ci( - {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input, - &input_assembly, nullptr, &viewport_state, &rasterizer, &multisampling, nullptr, - &color_blending, &dynamic_state, *pipeline_layout, *renderpass, 0, nullptr, 0); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - pipeline = dev.createGraphicsPipelineUnique({}, pipeline_ci, nullptr, dld); + VkPipelineVertexInputStateCreateInfo vertex_input_ci; + vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_input_ci.pNext = nullptr; + vertex_input_ci.flags = 0; + vertex_input_ci.vertexBindingDescriptionCount = 1; + vertex_input_ci.pVertexBindingDescriptions = &vertex_binding_description; + vertex_input_ci.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}; + vertex_input_ci.pVertexAttributeDescriptions = vertex_attrs_description.data(); + + VkPipelineInputAssemblyStateCreateInfo input_assembly_ci; + input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly_ci.pNext = nullptr; + input_assembly_ci.flags = 0; + input_assembly_ci.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + input_assembly_ci.primitiveRestartEnable = VK_FALSE; + + VkPipelineViewportStateCreateInfo viewport_state_ci; + viewport_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state_ci.pNext = nullptr; + viewport_state_ci.flags = 0; + viewport_state_ci.viewportCount = 1; + viewport_state_ci.pViewports = nullptr; + viewport_state_ci.scissorCount = 1; + viewport_state_ci.pScissors = nullptr; + + VkPipelineRasterizationStateCreateInfo rasterization_ci; + rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_ci.pNext = nullptr; + rasterization_ci.flags = 0; + rasterization_ci.depthClampEnable = VK_FALSE; + rasterization_ci.rasterizerDiscardEnable = VK_FALSE; + rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; + rasterization_ci.cullMode = VK_CULL_MODE_NONE; + rasterization_ci.frontFace = VK_FRONT_FACE_CLOCKWISE; + rasterization_ci.depthBiasEnable = VK_FALSE; + rasterization_ci.depthBiasConstantFactor = 0.0f; + rasterization_ci.depthBiasClamp = 0.0f; + rasterization_ci.depthBiasSlopeFactor = 0.0f; + rasterization_ci.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo multisampling_ci; + multisampling_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisampling_ci.pNext = nullptr; + multisampling_ci.flags = 0; + multisampling_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + multisampling_ci.sampleShadingEnable = VK_FALSE; + multisampling_ci.minSampleShading = 0.0f; + multisampling_ci.pSampleMask = nullptr; + multisampling_ci.alphaToCoverageEnable = VK_FALSE; + multisampling_ci.alphaToOneEnable = VK_FALSE; + + VkPipelineColorBlendAttachmentState color_blend_attachment; + color_blend_attachment.blendEnable = VK_FALSE; + color_blend_attachment.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO; + color_blend_attachment.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; + color_blend_attachment.colorBlendOp = VK_BLEND_OP_ADD; + color_blend_attachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + color_blend_attachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + color_blend_attachment.alphaBlendOp = VK_BLEND_OP_ADD; + color_blend_attachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendStateCreateInfo color_blend_ci; + color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blend_ci.flags = 0; + color_blend_ci.pNext = nullptr; + color_blend_ci.logicOpEnable = VK_FALSE; + color_blend_ci.logicOp = VK_LOGIC_OP_COPY; + color_blend_ci.attachmentCount = 1; + color_blend_ci.pAttachments = &color_blend_attachment; + color_blend_ci.blendConstants[0] = 0.0f; + color_blend_ci.blendConstants[1] = 0.0f; + color_blend_ci.blendConstants[2] = 0.0f; + color_blend_ci.blendConstants[3] = 0.0f; + + static constexpr std::array dynamic_states = {VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamic_state_ci; + dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state_ci.pNext = nullptr; + dynamic_state_ci.flags = 0; + dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size()); + dynamic_state_ci.pDynamicStates = dynamic_states.data(); + + VkGraphicsPipelineCreateInfo pipeline_ci; + pipeline_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_ci.pNext = nullptr; + pipeline_ci.flags = 0; + pipeline_ci.stageCount = static_cast<u32>(shader_stages.size()); + pipeline_ci.pStages = shader_stages.data(); + pipeline_ci.pVertexInputState = &vertex_input_ci; + pipeline_ci.pInputAssemblyState = &input_assembly_ci; + pipeline_ci.pTessellationState = nullptr; + pipeline_ci.pViewportState = &viewport_state_ci; + pipeline_ci.pRasterizationState = &rasterization_ci; + pipeline_ci.pMultisampleState = &multisampling_ci; + pipeline_ci.pDepthStencilState = nullptr; + pipeline_ci.pColorBlendState = &color_blend_ci; + pipeline_ci.pDynamicState = &dynamic_state_ci; + pipeline_ci.layout = *pipeline_layout; + pipeline_ci.renderPass = *renderpass; + pipeline_ci.subpass = 0; + pipeline_ci.basePipelineHandle = 0; + pipeline_ci.basePipelineIndex = 0; + + pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci); } void VKBlitScreen::CreateSampler() { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - const vk::SamplerCreateInfo sampler_ci( - {}, vk::Filter::eLinear, vk::Filter::eLinear, vk::SamplerMipmapMode::eLinear, - vk::SamplerAddressMode::eClampToBorder, vk::SamplerAddressMode::eClampToBorder, - vk::SamplerAddressMode::eClampToBorder, 0.0f, false, 0.0f, false, vk::CompareOp::eNever, - 0.0f, 0.0f, vk::BorderColor::eFloatOpaqueBlack, false); - sampler = dev.createSamplerUnique(sampler_ci, nullptr, dld); + VkSamplerCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.magFilter = VK_FILTER_LINEAR; + ci.minFilter = VK_FILTER_NEAREST; + ci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + ci.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + ci.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + ci.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + ci.mipLodBias = 0.0f; + ci.anisotropyEnable = VK_FALSE; + ci.maxAnisotropy = 0.0f; + ci.compareEnable = VK_FALSE; + ci.compareOp = VK_COMPARE_OP_NEVER; + ci.minLod = 0.0f; + ci.maxLod = 0.0f; + ci.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + ci.unnormalizedCoordinates = VK_FALSE; + + sampler = device.GetLogical().CreateSampler(ci); } void VKBlitScreen::CreateFramebuffers() { - const vk::Extent2D size{swapchain.GetSize()}; - framebuffers.clear(); + const VkExtent2D size{swapchain.GetSize()}; framebuffers.resize(image_count); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); + VkFramebufferCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.renderPass = *renderpass; + ci.attachmentCount = 1; + ci.width = size.width; + ci.height = size.height; + ci.layers = 1; for (std::size_t i = 0; i < image_count; ++i) { - const vk::ImageView image_view{swapchain.GetImageViewIndex(i)}; - const vk::FramebufferCreateInfo framebuffer_ci({}, *renderpass, 1, &image_view, size.width, - size.height, 1); - framebuffers[i] = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); + const VkImageView image_view{swapchain.GetImageViewIndex(i)}; + ci.pAttachments = &image_view; + framebuffers[i] = device.GetLogical().CreateFramebuffer(ci); } } @@ -507,54 +678,86 @@ void VKBlitScreen::ReleaseRawImages() { } void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - - const vk::BufferCreateInfo buffer_ci({}, CalculateBufferSize(framebuffer), - vk::BufferUsageFlagBits::eTransferSrc | - vk::BufferUsageFlagBits::eVertexBuffer | - vk::BufferUsageFlagBits::eUniformBuffer, - vk::SharingMode::eExclusive, 0, nullptr); - buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); - buffer_commit = memory_manager.Commit(*buffer, true); + VkBufferCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.size = CalculateBufferSize(framebuffer); + ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + + buffer = device.GetLogical().CreateBuffer(ci); + buffer_commit = memory_manager.Commit(buffer, true); } void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { raw_images.resize(image_count); raw_buffer_commits.resize(image_count); - const auto format = GetFormat(framebuffer); + VkImageCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.imageType = VK_IMAGE_TYPE_2D; + ci.format = GetFormat(framebuffer); + ci.extent.width = framebuffer.width; + ci.extent.height = framebuffer.height; + ci.extent.depth = 1; + ci.mipLevels = 1; + ci.arrayLayers = 1; + ci.samples = VK_SAMPLE_COUNT_1_BIT; + ci.tiling = VK_IMAGE_TILING_LINEAR; + ci.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + for (std::size_t i = 0; i < image_count; ++i) { - const vk::ImageCreateInfo image_ci( - {}, vk::ImageType::e2D, format, {framebuffer.width, framebuffer.height, 1}, 1, 1, - vk::SampleCountFlagBits::e1, vk::ImageTiling::eOptimal, - vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled, - vk::SharingMode::eExclusive, 0, nullptr, vk::ImageLayout::eUndefined); - - raw_images[i] = - std::make_unique<VKImage>(device, scheduler, image_ci, vk::ImageAspectFlagBits::eColor); + raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); } } -void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const { - const vk::DescriptorSet descriptor_set = descriptor_sets[image_index]; - - const vk::DescriptorBufferInfo buffer_info(*buffer, offsetof(BufferData, uniform), - sizeof(BufferData::uniform)); - const vk::WriteDescriptorSet ubo_write(descriptor_set, 0, 0, 1, - vk::DescriptorType::eUniformBuffer, nullptr, - &buffer_info, nullptr); - - const vk::DescriptorImageInfo image_info(*sampler, image_view, - vk::ImageLayout::eShaderReadOnlyOptimal); - const vk::WriteDescriptorSet sampler_write(descriptor_set, 1, 0, 1, - vk::DescriptorType::eCombinedImageSampler, - &image_info, nullptr, nullptr); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - dev.updateDescriptorSets({ubo_write, sampler_write}, {}, dld); +void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { + VkDescriptorBufferInfo buffer_info; + buffer_info.buffer = *buffer; + buffer_info.offset = offsetof(BufferData, uniform); + buffer_info.range = sizeof(BufferData::uniform); + + VkWriteDescriptorSet ubo_write; + ubo_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + ubo_write.pNext = nullptr; + ubo_write.dstSet = descriptor_sets[image_index]; + ubo_write.dstBinding = 0; + ubo_write.dstArrayElement = 0; + ubo_write.descriptorCount = 1; + ubo_write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + ubo_write.pImageInfo = nullptr; + ubo_write.pBufferInfo = &buffer_info; + ubo_write.pTexelBufferView = nullptr; + + VkDescriptorImageInfo image_info; + image_info.sampler = *sampler; + image_info.imageView = image_view; + image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + VkWriteDescriptorSet sampler_write; + sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + sampler_write.pNext = nullptr; + sampler_write.dstSet = descriptor_sets[image_index]; + sampler_write.dstBinding = 1; + sampler_write.dstArrayElement = 0; + sampler_write.descriptorCount = 1; + sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + sampler_write.pImageInfo = &image_info; + sampler_write.pBufferInfo = nullptr; + sampler_write.pTexelBufferView = nullptr; + + device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {}); } void VKBlitScreen::SetUniformData(BufferData& data, diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index ea680b3f5..5eb544aea 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -8,9 +8,9 @@ #include <memory> #include <tuple> -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Core { class System; @@ -49,8 +49,8 @@ public: void Recreate(); - std::tuple<VKFence&, vk::Semaphore> Draw(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated); + std::tuple<VKFence&, VkSemaphore> Draw(const Tegra::FramebufferConfig& framebuffer, + bool use_accelerated); private: struct BufferData; @@ -74,7 +74,7 @@ private: void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); - void UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const; + void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; @@ -93,23 +93,23 @@ private: const std::size_t image_count; const VKScreenInfo& screen_info; - UniqueShaderModule vertex_shader; - UniqueShaderModule fragment_shader; - UniqueDescriptorPool descriptor_pool; - UniqueDescriptorSetLayout descriptor_set_layout; - UniquePipelineLayout pipeline_layout; - UniquePipeline pipeline; - UniqueRenderPass renderpass; - std::vector<UniqueFramebuffer> framebuffers; - std::vector<vk::DescriptorSet> descriptor_sets; - UniqueSampler sampler; - - UniqueBuffer buffer; + vk::ShaderModule vertex_shader; + vk::ShaderModule fragment_shader; + vk::DescriptorPool descriptor_pool; + vk::DescriptorSetLayout descriptor_set_layout; + vk::PipelineLayout pipeline_layout; + vk::Pipeline pipeline; + vk::RenderPass renderpass; + std::vector<vk::Framebuffer> framebuffers; + vk::DescriptorSets descriptor_sets; + vk::Sampler sampler; + + vk::Buffer buffer; VKMemoryCommit buffer_commit; std::vector<std::unique_ptr<VKFenceWatch>> watches; - std::vector<UniqueSemaphore> semaphores; + std::vector<vk::Semaphore> semaphores; std::vector<std::unique_ptr<VKImage>> raw_images; std::vector<VKMemoryCommit> raw_buffer_commits; u32 raw_width = 0; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 326d74f29..81e1de2be 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -11,32 +11,31 @@ #include "common/assert.h" #include "common/bit_util.h" #include "core/core.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { namespace { -const auto BufferUsage = - vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | - vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer; +constexpr VkBufferUsageFlags BUFFER_USAGE = + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; -const auto UploadPipelineStage = - vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput | - vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | - vk::PipelineStageFlagBits::eComputeShader; +constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = + VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; -const auto UploadAccessBarriers = - vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead | - vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead | - vk::AccessFlagBits::eIndexRead; +constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; -auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { - return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage); +std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { + return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE); } } // Anonymous namespace @@ -44,15 +43,18 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { - const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), - BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | - vk::BufferUsageFlagBits::eTransferDst, - vk::SharingMode::eExclusive, 0, nullptr); - - const auto& dld{device.GetDispatchLoader()}; - const auto dev{device.GetLogical()}; - buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld); - buffer.commit = memory_manager.Commit(*buffer.handle, false); + VkBufferCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.size = static_cast<VkDeviceSize>(size); + ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + + buffer.handle = device.GetLogical().CreateBuffer(ci); + buffer.commit = memory_manager.Commit(buffer.handle, false); } CachedBufferBlock::~CachedBufferBlock() = default; @@ -60,9 +62,9 @@ CachedBufferBlock::~CachedBufferBlock() = default; VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool) - : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system, - CreateStreamBuffer(device, - scheduler)}, + : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, + CreateStreamBuffer(device, + scheduler)}, device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ staging_pool} {} @@ -72,18 +74,18 @@ Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); } -const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { +VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) { return buffer->GetHandle(); } -const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { +VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { size = std::max(size, std::size_t(4)); const auto& empty = staging_pool.GetUnusedBuffer(size, false); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) { - cmdbuf.fillBuffer(buffer, 0, size, 0, dld); + scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { + cmdbuf.FillBuffer(buffer, 0, size, 0); }); - return &*empty.handle; + return *empty.handle; } void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, @@ -92,15 +94,22 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st std::memcpy(staging.commit->Map(size), data, size); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, - size](auto cmdbuf, auto& dld) { - cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld); - cmdbuf.pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, - {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, - offset, size)}, - {}, dld); + scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, + size](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = offset; + barrier.size = size; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, + barrier, {}); }); } @@ -108,17 +117,24 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, u8* data) { const auto& staging = staging_pool.GetUnusedBuffer(size, true); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, - size](auto cmdbuf, auto& dld) { - cmdbuf.pipelineBarrier( - vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | - vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer, {}, {}, - {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite, - vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)}, - {}, dld); - cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld); + scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, + size](vk::CommandBuffer cmdbuf) { + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = offset; + barrier.size = size; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); + cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); }); scheduler.Finish(); @@ -128,18 +144,31 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) { scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, - dst_offset, size](auto cmdbuf, auto& dld) { - cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld); - cmdbuf.pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, - {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead, - vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size), - vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer, - dst_offset, size)}, - {}, dld); + scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset, + dst_offset, size](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); + + std::array<VkBufferMemoryBarrier, 2> barriers; + barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barriers[0].pNext = nullptr; + barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].buffer = src_buffer; + barriers[0].offset = src_offset; + barriers[0].size = size; + barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barriers[1].pNext = nullptr; + barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].buffer = dst_buffer; + barriers[1].offset = dst_offset; + barriers[1].size = size; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, + barriers, {}); }); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 508214618..3cd2e2774 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -11,11 +11,11 @@ #include "common/common_types.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/rasterizer_cache.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Core { class System; @@ -33,8 +33,8 @@ public: VAddr cpu_addr, std::size_t size); ~CachedBufferBlock(); - const vk::Buffer* GetHandle() const { - return &*buffer.handle; + VkBuffer GetHandle() const { + return *buffer.handle; } private: @@ -43,22 +43,22 @@ private: using Buffer = std::shared_ptr<CachedBufferBlock>; -class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> { +class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { public: explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); - const vk::Buffer* GetEmptyBuffer(std::size_t size) override; + VkBuffer GetEmptyBuffer(std::size_t size) override; protected: + VkBuffer ToHandle(const Buffer& buffer) override; + void WriteBarrier() override {} Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; - const vk::Buffer* ToHandle(const Buffer& buffer) override; - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) override; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 7bdda3d79..7b0268033 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -10,13 +10,13 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -114,6 +114,35 @@ constexpr u8 quad_array[] = { 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; +VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { + VkDescriptorSetLayoutBinding binding; + binding.binding = 0; + binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + binding.pImmutableSamplers = nullptr; + return binding; +} + +VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { + VkDescriptorUpdateTemplateEntryKHR entry; + entry.dstBinding = 0; + entry.dstArrayElement = 0; + entry.descriptorCount = 1; + entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + entry.offset = 0; + entry.stride = sizeof(DescriptorUpdateEntry); + return entry; +} + +VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { + VkPushConstantRange range; + range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + range.offset = 0; + range.size = static_cast<u32>(size); + return range; +} + // Uint8 SPIR-V module. Generated from the "shaders/" directory. constexpr u8 uint8_pass[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, @@ -191,53 +220,234 @@ constexpr u8 uint8_pass[] = { 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; +// Quad indexed SPIR-V module. Generated from the "shaders/" directory. +constexpr u8 QUAD_INDEXED_SPV[] = { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, + 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, + 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, + 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, + 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, + 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, + 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, + 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, + 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, + 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, + 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, + 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, + 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, + 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, + 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, + 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00, + 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, + 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, + 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, + 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00, + 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, + 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, + 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; + +std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { + std::array<VkDescriptorSetLayoutBinding, 2> bindings; + bindings[0].binding = 0; + bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[0].descriptorCount = 1; + bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[0].pImmutableSamplers = nullptr; + bindings[1].binding = 1; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[1].descriptorCount = 1; + bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + bindings[1].pImmutableSamplers = nullptr; + return bindings; +} + +VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { + VkDescriptorUpdateTemplateEntryKHR entry; + entry.dstBinding = 0; + entry.dstArrayElement = 0; + entry.descriptorCount = 2; + entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + entry.offset = 0; + entry.stride = sizeof(DescriptorUpdateEntry); + return entry; +} + } // Anonymous namespace VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, - const std::vector<vk::DescriptorSetLayoutBinding>& bindings, - const std::vector<vk::DescriptorUpdateTemplateEntry>& templates, - const std::vector<vk::PushConstantRange> push_constants, - std::size_t code_size, const u8* code) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - - const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci( - {}, static_cast<u32>(bindings.size()), bindings.data()); - descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld); - - const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, - static_cast<u32>(push_constants.size()), - push_constants.data()); - layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); + vk::Span<VkDescriptorSetLayoutBinding> bindings, + vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, + vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, + const u8* code) { + VkDescriptorSetLayoutCreateInfo descriptor_layout_ci; + descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_layout_ci.pNext = nullptr; + descriptor_layout_ci.flags = 0; + descriptor_layout_ci.bindingCount = bindings.size(); + descriptor_layout_ci.pBindings = bindings.data(); + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci); + + VkPipelineLayoutCreateInfo pipeline_layout_ci; + pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_ci.pNext = nullptr; + pipeline_layout_ci.flags = 0; + pipeline_layout_ci.setLayoutCount = 1; + pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address(); + pipeline_layout_ci.pushConstantRangeCount = push_constants.size(); + pipeline_layout_ci.pPushConstantRanges = push_constants.data(); + layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci); if (!templates.empty()) { - const vk::DescriptorUpdateTemplateCreateInfo template_ci( - {}, static_cast<u32>(templates.size()), templates.data(), - vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, - vk::PipelineBindPoint::eGraphics, *layout, 0); - descriptor_template = dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); + VkDescriptorUpdateTemplateCreateInfoKHR template_ci; + template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; + template_ci.pNext = nullptr; + template_ci.flags = 0; + template_ci.descriptorUpdateEntryCount = templates.size(); + template_ci.pDescriptorUpdateEntries = templates.data(); + template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + template_ci.descriptorSetLayout = *descriptor_set_layout; + template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + template_ci.pipelineLayout = *layout; + template_ci.set = 0; + descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci); descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); } auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); std::memcpy(code_copy.get(), code, code_size); - const vk::ShaderModuleCreateInfo module_ci({}, code_size, code_copy.get()); - module = dev.createShaderModuleUnique(module_ci, nullptr, dld); - const vk::PipelineShaderStageCreateInfo stage_ci({}, vk::ShaderStageFlagBits::eCompute, *module, - "main", nullptr); + VkShaderModuleCreateInfo module_ci; + module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + module_ci.pNext = nullptr; + module_ci.flags = 0; + module_ci.codeSize = code_size; + module_ci.pCode = code_copy.get(); + module = device.GetLogical().CreateShaderModule(module_ci); + + VkComputePipelineCreateInfo pipeline_ci; + pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipeline_ci.pNext = nullptr; + pipeline_ci.flags = 0; + pipeline_ci.layout = *layout; + pipeline_ci.basePipelineHandle = nullptr; + pipeline_ci.basePipelineIndex = 0; - const vk::ComputePipelineCreateInfo pipeline_ci({}, stage_ci, *layout, nullptr, 0); - pipeline = dev.createComputePipelineUnique(nullptr, pipeline_ci, nullptr, dld); + VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage; + stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage_ci.pNext = nullptr; + stage_ci.flags = 0; + stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; + stage_ci.module = *module; + stage_ci.pName = "main"; + stage_ci.pSpecializationInfo = nullptr; + + pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); } VKComputePass::~VKComputePass() = default; -vk::DescriptorSet VKComputePass::CommitDescriptorSet( - VKUpdateDescriptorQueue& update_descriptor_queue, VKFence& fence) { +VkDescriptorSet VKComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, + VKFence& fence) { if (!descriptor_template) { - return {}; + return nullptr; } const auto set = descriptor_allocator->Commit(fence); update_descriptor_queue.Send(*descriptor_template, set); @@ -248,25 +458,21 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, VKUpdateDescriptorQueue& update_descriptor_queue) - : VKComputePass(device, descriptor_pool, - {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1, - vk::ShaderStageFlagBits::eCompute, nullptr)}, - {vk::DescriptorUpdateTemplateEntry(0, 0, 1, vk::DescriptorType::eStorageBuffer, - 0, sizeof(DescriptorUpdateEntry))}, - {vk::PushConstantRange(vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32))}, - std::size(quad_array), quad_array), + : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), + BuildQuadArrayPassDescriptorUpdateTemplateEntry(), + BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, update_descriptor_queue{update_descriptor_queue} {} QuadArrayPass::~QuadArrayPass() = default; -std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { - const u32 num_triangle_vertices = num_vertices * 6 / 4; +std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { + const u32 num_triangle_vertices = (num_vertices / 4) * 6; const std::size_t staging_size = num_triangle_vertices * sizeof(u32); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size); + update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); scheduler.RequestOutsideRenderPassOperationContext(); @@ -274,20 +480,25 @@ std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_ver ASSERT(num_vertices % 4 == 0); const u32 num_quads = num_vertices / 4; scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads, - first, set](auto cmdbuf, auto& dld) { + first, set](vk::CommandBuffer cmdbuf) { constexpr u32 dispatch_size = 1024; - cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld); - cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(first), &first, - dld); - cmdbuf.dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1, dld); - - const vk::BufferMemoryBarrier barrier( - vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0, - static_cast<vk::DeviceSize>(num_quads) * 6 * sizeof(u32)); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first); + cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1); + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = 0; + barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {}); }); return {*buffer.handle, 0}; } @@ -295,45 +506,112 @@ std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_ver Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, VKUpdateDescriptorQueue& update_descriptor_queue) - : VKComputePass(device, descriptor_pool, - {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1, - vk::ShaderStageFlagBits::eCompute, nullptr), - vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eStorageBuffer, 1, - vk::ShaderStageFlagBits::eCompute, nullptr)}, - {vk::DescriptorUpdateTemplateEntry(0, 0, 2, vk::DescriptorType::eStorageBuffer, - 0, sizeof(DescriptorUpdateEntry))}, - {}, std::size(uint8_pass), uint8_pass), + : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), + BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), + uint8_pass), scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, update_descriptor_queue{update_descriptor_queue} {} Uint8Pass::~Uint8Pass() = default; -std::pair<const vk::Buffer*, u64> Uint8Pass::Assemble(u32 num_vertices, vk::Buffer src_buffer, - u64 src_offset) { +std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, + u64 src_offset) { const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16)); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices); - update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size); + update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); + update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, - num_vertices](auto cmdbuf, auto& dld) { + num_vertices](vk::CommandBuffer cmdbuf) { constexpr u32 dispatch_size = 1024; - cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld); - cmdbuf.dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1, dld); - - const vk::BufferMemoryBarrier barrier( - vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0, - static_cast<vk::DeviceSize>(num_vertices) * sizeof(u16)); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = 0; + barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16)); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); }); - return {&*buffer.handle, 0}; + return {*buffer.handle, 0}; +} + +QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKStagingBufferPool& staging_buffer_pool, + VKUpdateDescriptorQueue& update_descriptor_queue) + : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), + BuildInputOutputDescriptorUpdateTemplate(), + BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), + QUAD_INDEXED_SPV), + scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, + update_descriptor_queue{update_descriptor_queue} {} + +QuadIndexedPass::~QuadIndexedPass() = default; + +std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( + Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, + VkBuffer src_buffer, u64 src_offset) { + const u32 index_shift = [index_format] { + switch (index_format) { + case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: + return 0; + case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort: + return 1; + case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt: + return 2; + } + UNREACHABLE(); + return 2; + }(); + const u32 input_size = num_vertices << index_shift; + const u32 num_tri_vertices = (num_vertices / 4) * 6; + + const std::size_t staging_size = num_tri_vertices * sizeof(u32); + auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); + + update_descriptor_queue.Acquire(); + update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); + update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); + const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, + num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { + static constexpr u32 dispatch_size = 1024; + const std::array push_constants = {base_vertex, index_shift}; + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + &push_constants); + cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = 0; + barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); + }); + return {*buffer.handle, 0}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 7057eb837..26bf834de 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -8,8 +8,9 @@ #include <utility> #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -22,24 +23,24 @@ class VKUpdateDescriptorQueue; class VKComputePass { public: explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, - const std::vector<vk::DescriptorSetLayoutBinding>& bindings, - const std::vector<vk::DescriptorUpdateTemplateEntry>& templates, - const std::vector<vk::PushConstantRange> push_constants, - std::size_t code_size, const u8* code); + vk::Span<VkDescriptorSetLayoutBinding> bindings, + vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, + vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, + const u8* code); ~VKComputePass(); protected: - vk::DescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, - VKFence& fence); + VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, + VKFence& fence); - UniqueDescriptorUpdateTemplate descriptor_template; - UniquePipelineLayout layout; - UniquePipeline pipeline; + vk::DescriptorUpdateTemplateKHR descriptor_template; + vk::PipelineLayout layout; + vk::Pipeline pipeline; private: - UniqueDescriptorSetLayout descriptor_set_layout; + vk::DescriptorSetLayout descriptor_set_layout; std::optional<DescriptorAllocator> descriptor_allocator; - UniqueShaderModule module; + vk::ShaderModule module; }; class QuadArrayPass final : public VKComputePass { @@ -50,7 +51,7 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~QuadArrayPass(); - std::pair<const vk::Buffer&, vk::DeviceSize> Assemble(u32 num_vertices, u32 first); + std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first); private: VKScheduler& scheduler; @@ -65,8 +66,25 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~Uint8Pass(); - std::pair<const vk::Buffer*, u64> Assemble(u32 num_vertices, vk::Buffer src_buffer, - u64 src_offset); + std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); + +private: + VKScheduler& scheduler; + VKStagingBufferPool& staging_buffer_pool; + VKUpdateDescriptorQueue& update_descriptor_queue; +}; + +class QuadIndexedPass final : public VKComputePass { +public: + explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKStagingBufferPool& staging_buffer_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); + ~QuadIndexedPass(); + + std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, + u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, + u64 src_offset); private: VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 60f57d83e..52566bb79 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -5,7 +5,6 @@ #include <memory> #include <vector> -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" @@ -14,6 +13,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -30,7 +30,7 @@ VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& schedu VKComputePipeline::~VKComputePipeline() = default; -vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() { +VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { if (!descriptor_template) { return {}; } @@ -39,74 +39,111 @@ vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() { return set; } -UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { - std::vector<vk::DescriptorSetLayoutBinding> bindings; +vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { + std::vector<VkDescriptorSetLayoutBinding> bindings; u32 binding = 0; - const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { + const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { // TODO(Rodrigo): Maybe make individual bindings here? for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { - bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute, - nullptr); + VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); + entry.binding = binding++; + entry.descriptorType = descriptor_type; + entry.descriptorCount = 1; + entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + entry.pImmutableSamplers = nullptr; } }; - AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); - AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); - AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); - AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); - AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); - - const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( - {}, static_cast<u32>(bindings.size()), bindings.data()); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); + add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); + add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); + add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size()); + add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); + add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); + + VkDescriptorSetLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.bindingCount = static_cast<u32>(bindings.size()); + ci.pBindings = bindings.data(); + return device.GetLogical().CreateDescriptorSetLayout(ci); } -UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const { - const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr); - const auto dev = device.GetLogical(); - return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader()); +vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { + VkPipelineLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.setLayoutCount = 1; + ci.pSetLayouts = descriptor_set_layout.address(); + ci.pushConstantRangeCount = 0; + ci.pPushConstantRanges = nullptr; + return device.GetLogical().CreatePipelineLayout(ci); } -UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const { - std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; +vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { + std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries; u32 binding = 0; u32 offset = 0; FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); if (template_entries.empty()) { // If the shader doesn't use descriptor sets, skip template creation. - return UniqueDescriptorUpdateTemplate{}; + return {}; } - const vk::DescriptorUpdateTemplateCreateInfo template_ci( - {}, static_cast<u32>(template_entries.size()), template_entries.data(), - vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, - vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); + VkDescriptorUpdateTemplateCreateInfoKHR ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; + ci.pNext = nullptr; + ci.flags = 0; + ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); + ci.pDescriptorUpdateEntries = template_entries.data(); + ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + ci.descriptorSetLayout = *descriptor_set_layout; + ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + ci.pipelineLayout = *layout; + ci.set = DESCRIPTOR_SET; + return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); } -UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { - const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data()); - const auto dev = device.GetLogical(); - return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader()); +vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { + device.SaveShader(code); + + VkShaderModuleCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.codeSize = code.size() * sizeof(u32); + ci.pCode = code.data(); + return device.GetLogical().CreateShaderModule(ci); } -UniquePipeline VKComputePipeline::CreatePipeline() const { - vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute, - *shader_module, "main", nullptr); - vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; +vk::Pipeline VKComputePipeline::CreatePipeline() const { + VkComputePipelineCreateInfo ci; + VkPipelineShaderStageCreateInfo& stage_ci = ci.stage; + stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage_ci.pNext = nullptr; + stage_ci.flags = 0; + stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; + stage_ci.module = *shader_module; + stage_ci.pName = "main"; + stage_ci.pSpecializationInfo = nullptr; + + VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; + subgroup_size_ci.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; + subgroup_size_ci.pNext = nullptr; subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; - if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) { - shader_stage_ci.pNext = &subgroup_size_ci; + + if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { + stage_ci.pNext = &subgroup_size_ci; } - const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0); - const auto dev = device.GetLogical(); - return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader()); + ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.layout = *layout; + ci.basePipelineHandle = nullptr; + ci.basePipelineIndex = 0; + return device.GetLogical().CreateComputePipeline(ci); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 22235c6c9..33b9af29e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -7,9 +7,9 @@ #include <memory> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -25,42 +25,42 @@ public: const SPIRVShader& shader); ~VKComputePipeline(); - vk::DescriptorSet CommitDescriptorSet(); + VkDescriptorSet CommitDescriptorSet(); - vk::Pipeline GetHandle() const { + VkPipeline GetHandle() const { return *pipeline; } - vk::PipelineLayout GetLayout() const { + VkPipelineLayout GetLayout() const { return *layout; } - const ShaderEntries& GetEntries() { + const ShaderEntries& GetEntries() const { return entries; } private: - UniqueDescriptorSetLayout CreateDescriptorSetLayout() const; + vk::DescriptorSetLayout CreateDescriptorSetLayout() const; - UniquePipelineLayout CreatePipelineLayout() const; + vk::PipelineLayout CreatePipelineLayout() const; - UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const; + vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; - UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const; + vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const; - UniquePipeline CreatePipeline() const; + vk::Pipeline CreatePipeline() const; const VKDevice& device; VKScheduler& scheduler; ShaderEntries entries; - UniqueDescriptorSetLayout descriptor_set_layout; + vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; VKUpdateDescriptorQueue& update_descriptor_queue; - UniquePipelineLayout layout; - UniqueDescriptorUpdateTemplate descriptor_template; - UniqueShaderModule shader_module; - UniquePipeline pipeline; + vk::PipelineLayout layout; + vk::DescriptorUpdateTemplateKHR descriptor_template; + vk::ShaderModule shader_module; + vk::Pipeline pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index cc7c281a0..e9d528aa6 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -6,10 +6,10 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -17,19 +17,18 @@ namespace Vulkan { constexpr std::size_t SETS_GROW_RATE = 0x20; DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool, - vk::DescriptorSetLayout layout) + VkDescriptorSetLayout layout) : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {} DescriptorAllocator::~DescriptorAllocator() = default; -vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) { - return *descriptors[CommitResource(fence)]; +VkDescriptorSet DescriptorAllocator::Commit(VKFence& fence) { + const std::size_t index = CommitResource(fence); + return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; } void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin); - descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()), - std::make_move_iterator(new_sets.end())); + descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); } VKDescriptorPool::VKDescriptorPool(const VKDevice& device) @@ -37,53 +36,50 @@ VKDescriptorPool::VKDescriptorPool(const VKDevice& device) VKDescriptorPool::~VKDescriptorPool() = default; -vk::DescriptorPool VKDescriptorPool::AllocateNewPool() { +vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { static constexpr u32 num_sets = 0x20000; - static constexpr vk::DescriptorPoolSize pool_sizes[] = { - {vk::DescriptorType::eUniformBuffer, num_sets * 90}, - {vk::DescriptorType::eStorageBuffer, num_sets * 60}, - {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64}, - {vk::DescriptorType::eCombinedImageSampler, num_sets * 64}, - {vk::DescriptorType::eStorageImage, num_sets * 40}}; - - const vk::DescriptorPoolCreateInfo create_info( - vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets, - static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes)); - const auto dev = device.GetLogical(); - return *pools.emplace_back( - dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader())); + static constexpr VkDescriptorPoolSize pool_sizes[] = { + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, + {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, + {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; + + VkDescriptorPoolCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + ci.maxSets = num_sets; + ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes)); + ci.pPoolSizes = std::data(pool_sizes); + return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); } -std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors( - vk::DescriptorSetLayout layout, std::size_t count) { - std::vector layout_copies(count, layout); - vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count), - layout_copies.data()); - - std::vector<vk::DescriptorSet> sets(count); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) { - case vk::Result::eSuccess: - break; - case vk::Result::eErrorOutOfPoolMemory: - active_pool = AllocateNewPool(); - allocate_info.descriptorPool = active_pool; - if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) { - break; - } - [[fallthrough]]; - default: - vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique"); +vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, + std::size_t count) { + const std::vector layout_copies(count, layout); + VkDescriptorSetAllocateInfo ai; + ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + ai.pNext = nullptr; + ai.descriptorPool = **active_pool; + ai.descriptorSetCount = static_cast<u32>(count); + ai.pSetLayouts = layout_copies.data(); + + vk::DescriptorSets sets = active_pool->Allocate(ai); + if (!sets.IsOutOfPoolMemory()) { + return sets; } - vk::PoolFree deleter(dev, active_pool, dld); - std::vector<UniqueDescriptorSet> unique_sets; - unique_sets.reserve(count); - for (const auto set : sets) { - unique_sets.push_back(UniqueDescriptorSet{set, deleter}); + // Our current pool is out of memory. Allocate a new one and retry + active_pool = AllocateNewPool(); + ai.descriptorPool = **active_pool; + sets = active_pool->Allocate(ai); + if (!sets.IsOutOfPoolMemory()) { + return sets; } - return unique_sets; + + // After allocating a new pool, we are out of memory again. We can't handle this from here. + throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index a441dbc0f..ab40c70f0 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -8,8 +8,8 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -17,21 +17,21 @@ class VKDescriptorPool; class DescriptorAllocator final : public VKFencedPool { public: - explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout); + explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); ~DescriptorAllocator() override; DescriptorAllocator(const DescriptorAllocator&) = delete; - vk::DescriptorSet Commit(VKFence& fence); + VkDescriptorSet Commit(VKFence& fence); protected: void Allocate(std::size_t begin, std::size_t end) override; private: VKDescriptorPool& descriptor_pool; - const vk::DescriptorSetLayout layout; + const VkDescriptorSetLayout layout; - std::vector<UniqueDescriptorSet> descriptors; + std::vector<vk::DescriptorSets> descriptors_allocations; }; class VKDescriptorPool final { @@ -42,15 +42,14 @@ public: ~VKDescriptorPool(); private: - vk::DescriptorPool AllocateNewPool(); + vk::DescriptorPool* AllocateNewPool(); - std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout, - std::size_t count); + vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); const VKDevice& device; - std::vector<UniqueDescriptorPool> pools; - vk::DescriptorPool active_pool; + std::vector<vk::DescriptorPool> pools; + vk::DescriptorPool* active_pool; }; } // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 6f4ae9132..e90c76492 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -6,15 +6,16 @@ #include <chrono> #include <cstdlib> #include <optional> -#include <set> #include <string_view> #include <thread> +#include <unordered_set> +#include <utility> #include <vector> #include "common/assert.h" #include "core/settings.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -22,49 +23,43 @@ namespace { namespace Alternatives { -constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint, - vk::Format::eD16UnormS8Uint, vk::Format{}}; -constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint, - vk::Format::eD32SfloatS8Uint, vk::Format{}}; +constexpr std::array Depth24UnormS8_UINT = {VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_D16_UNORM_S8_UINT, VkFormat{}}; +constexpr std::array Depth16UnormS8_UINT = {VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, VkFormat{}}; } // namespace Alternatives +constexpr std::array REQUIRED_EXTENSIONS = { + VK_KHR_SWAPCHAIN_EXTENSION_NAME, + VK_KHR_16BIT_STORAGE_EXTENSION_NAME, + VK_KHR_8BIT_STORAGE_EXTENSION_NAME, + VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, + VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, + VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, + VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, + VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, +}; + template <typename T> void SetNext(void**& next, T& data) { *next = &data; next = &data.pNext; } -template <typename T> -T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) { - vk::PhysicalDeviceFeatures2 features; - T extension_features; - features.pNext = &extension_features; - physical.getFeatures2(&features, dld); - return extension_features; -} - -template <typename T> -T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) { - vk::PhysicalDeviceProperties2 properties; - T extension_properties; - properties.pNext = &extension_properties; - physical.getProperties2(&properties, dld); - return extension_properties; -} - -constexpr const vk::Format* GetFormatAlternatives(vk::Format format) { +constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { switch (format) { - case vk::Format::eD24UnormS8Uint: - return Alternatives::Depth24UnormS8Uint.data(); - case vk::Format::eD16UnormS8Uint: - return Alternatives::Depth16UnormS8Uint.data(); + case VK_FORMAT_D24_UNORM_S8_UINT: + return Alternatives::Depth24UnormS8_UINT.data(); + case VK_FORMAT_D16_UNORM_S8_UINT: + return Alternatives::Depth16UnormS8_UINT.data(); default: return nullptr; } } -vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, FormatType format_type) { +VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType format_type) { switch (format_type) { case FormatType::Linear: return properties.linearTilingFeatures; @@ -77,79 +72,221 @@ vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, Format } } +std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( + vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { + static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32, + VK_FORMAT_A8B8G8R8_UINT_PACK32, + VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VK_FORMAT_A8B8G8R8_SRGB_PACK32, + VK_FORMAT_B5G6R5_UNORM_PACK16, + VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R32G32B32A32_UINT, + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32_UINT, + VK_FORMAT_R16G16B16A16_UINT, + VK_FORMAT_R16G16B16A16_SNORM, + VK_FORMAT_R16G16B16A16_UNORM, + VK_FORMAT_R16G16_UNORM, + VK_FORMAT_R16G16_SNORM, + VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16_UNORM, + VK_FORMAT_R8G8B8A8_SRGB, + VK_FORMAT_R8G8_UNORM, + VK_FORMAT_R8G8_SNORM, + VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_UINT, + VK_FORMAT_B10G11R11_UFLOAT_PACK32, + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_R32_UINT, + VK_FORMAT_R32_SINT, + VK_FORMAT_R16_SFLOAT, + VK_FORMAT_R16G16B16A16_SFLOAT, + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_D16_UNORM, + VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK, + VK_FORMAT_BC5_SNORM_BLOCK, + VK_FORMAT_BC7_UNORM_BLOCK, + VK_FORMAT_BC6H_UFLOAT_BLOCK, + VK_FORMAT_BC6H_SFLOAT_BLOCK, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_FORMAT_BC7_SRGB_BLOCK, + VK_FORMAT_ASTC_4x4_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK, + VK_FORMAT_ASTC_5x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK, + VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK, + VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK, + VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}; + std::unordered_map<VkFormat, VkFormatProperties> format_properties; + for (const auto format : formats) { + format_properties.emplace(format, physical.GetFormatProperties(format)); + } + return format_properties; +} + } // Anonymous namespace -VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical, - vk::SurfaceKHR surface) - : dld{dld}, physical{physical}, properties{physical.getProperties(dld)}, - format_properties{GetFormatProperties(dld, physical)} { +VKDevice::VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld) + : dld{dld}, physical{physical}, properties{physical.GetProperties()}, + format_properties{GetFormatProperties(physical, dld)} { SetupFamilies(surface); SetupFeatures(); } VKDevice::~VKDevice() = default; -bool VKDevice::Create(vk::Instance instance) { +bool VKDevice::Create() { const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(); - vk::PhysicalDeviceFeatures2 features2; + VkPhysicalDeviceFeatures2 features2; + features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + features2.pNext = nullptr; + const void* first_next = &features2; void** next = &features2.pNext; + auto& features = features2.features; - features.vertexPipelineStoresAndAtomics = true; + features.robustBufferAccess = false; + features.fullDrawIndexUint32 = false; + features.imageCubeArray = false; features.independentBlend = true; + features.geometryShader = true; + features.tessellationShader = true; + features.sampleRateShading = false; + features.dualSrcBlend = false; + features.logicOp = false; + features.multiDrawIndirect = false; + features.drawIndirectFirstInstance = false; features.depthClamp = true; - features.samplerAnisotropy = true; + features.depthBiasClamp = true; + features.fillModeNonSolid = false; + features.depthBounds = false; + features.wideLines = false; features.largePoints = true; + features.alphaToOne = false; features.multiViewport = true; - features.depthBiasClamp = true; - features.geometryShader = true; - features.tessellationShader = true; + features.samplerAnisotropy = true; + features.textureCompressionETC2 = false; + features.textureCompressionASTC_LDR = is_optimal_astc_supported; + features.textureCompressionBC = false; features.occlusionQueryPrecise = true; + features.pipelineStatisticsQuery = false; + features.vertexPipelineStoresAndAtomics = true; features.fragmentStoresAndAtomics = true; + features.shaderTessellationAndGeometryPointSize = false; features.shaderImageGatherExtended = true; + features.shaderStorageImageExtendedFormats = false; + features.shaderStorageImageMultisample = false; features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; features.shaderStorageImageWriteWithoutFormat = true; - features.textureCompressionASTC_LDR = is_optimal_astc_supported; - - vk::PhysicalDevice16BitStorageFeaturesKHR bit16_storage; + features.shaderUniformBufferArrayDynamicIndexing = false; + features.shaderSampledImageArrayDynamicIndexing = false; + features.shaderStorageBufferArrayDynamicIndexing = false; + features.shaderStorageImageArrayDynamicIndexing = false; + features.shaderClipDistance = false; + features.shaderCullDistance = false; + features.shaderFloat64 = false; + features.shaderInt64 = false; + features.shaderInt16 = false; + features.shaderResourceResidency = false; + features.shaderResourceMinLod = false; + features.sparseBinding = false; + features.sparseResidencyBuffer = false; + features.sparseResidencyImage2D = false; + features.sparseResidencyImage3D = false; + features.sparseResidency2Samples = false; + features.sparseResidency4Samples = false; + features.sparseResidency8Samples = false; + features.sparseResidency16Samples = false; + features.sparseResidencyAliased = false; + features.variableMultisampleRate = false; + features.inheritedQueries = false; + + VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage; + bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR; + bit16_storage.pNext = nullptr; + bit16_storage.storageBuffer16BitAccess = false; bit16_storage.uniformAndStorageBuffer16BitAccess = true; + bit16_storage.storagePushConstant16 = false; + bit16_storage.storageInputOutput16 = false; SetNext(next, bit16_storage); - vk::PhysicalDevice8BitStorageFeaturesKHR bit8_storage; + VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage; + bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR; + bit8_storage.pNext = nullptr; + bit8_storage.storageBuffer8BitAccess = false; bit8_storage.uniformAndStorageBuffer8BitAccess = true; + bit8_storage.storagePushConstant8 = false; SetNext(next, bit8_storage); - vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; + VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; + host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT; host_query_reset.hostQueryReset = true; SetNext(next, host_query_reset); - vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { + float16_int8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; + float16_int8.pNext = nullptr; float16_int8.shaderFloat16 = true; + float16_int8.shaderInt8 = false; SetNext(next, float16_int8); } else { LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); } - vk::PhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { + std430_layout.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR; + std430_layout.pNext = nullptr; std430_layout.uniformBufferStandardLayout = true; SetNext(next, std430_layout); } else { LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); } - vk::PhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; + VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; if (ext_index_type_uint8) { + index_type_uint8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT; + index_type_uint8.pNext = nullptr; index_type_uint8.indexTypeUint8 = true; SetNext(next, index_type_uint8); } else { LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); } - vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; + VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; if (ext_transform_feedback) { + transform_feedback.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; + transform_feedback.pNext = nullptr; transform_feedback.transformFeedback = true; transform_feedback.geometryStreams = true; SetNext(next, transform_feedback); @@ -161,105 +298,101 @@ bool VKDevice::Create(vk::Instance instance) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } - vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0, - nullptr, static_cast<u32>(extensions.size()), extensions.data(), - nullptr); - device_ci.pNext = &features2; + VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; + if (nv_device_diagnostics_config) { + nsight_aftermath_tracker.Initialize(); + + diagnostics_nv.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV; + diagnostics_nv.pNext = &features2; + diagnostics_nv.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV | + VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV | + VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV; + first_next = &diagnostics_nv; + } - vk::Device unsafe_logical; - if (physical.createDevice(&device_ci, nullptr, &unsafe_logical, dld) != vk::Result::eSuccess) { - LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); + logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); + if (!logical) { + LOG_ERROR(Render_Vulkan, "Failed to create logical device"); return false; } - dld.init(instance, dld.vkGetInstanceProcAddr, unsafe_logical); - logical = UniqueDevice(unsafe_logical, {nullptr, dld}); CollectTelemetryParameters(); - graphics_queue = logical->getQueue(graphics_family, 0, dld); - present_queue = logical->getQueue(present_family, 0, dld); + graphics_queue = logical.GetQueue(graphics_family); + present_queue = logical.GetQueue(present_family); return true; } -vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, - vk::FormatFeatureFlags wanted_usage, - FormatType format_type) const { +VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const { if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { return wanted_format; } // The wanted format is not supported by hardware, search for alternatives - const vk::Format* alternatives = GetFormatAlternatives(wanted_format); + const VkFormat* alternatives = GetFormatAlternatives(wanted_format); if (alternatives == nullptr) { UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host " "hardware does not support it", - vk::to_string(wanted_format), vk::to_string(wanted_usage), - static_cast<u32>(format_type)); + wanted_format, wanted_usage, format_type); return wanted_format; } std::size_t i = 0; - for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; - alternative = alternatives[++i]) { + for (VkFormat alternative = *alternatives; alternative; alternative = alternatives[++i]) { if (!IsFormatSupported(alternative, wanted_usage, format_type)) { continue; } LOG_WARNING(Render_Vulkan, "Emulating format={} with alternative format={} with usage={} and type={}", - static_cast<u32>(wanted_format), static_cast<u32>(alternative), - static_cast<u32>(wanted_usage), static_cast<u32>(format_type)); + wanted_format, alternative, wanted_usage, format_type); return alternative; } // No alternatives found, panic UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and " "doesn't support any of the alternatives", - static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), - static_cast<u32>(format_type)); + wanted_format, wanted_usage, format_type); return wanted_format; } void VKDevice::ReportLoss() const { LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); - // Wait some time to let the log flush - std::this_thread::sleep_for(std::chrono::seconds{1}); - - if (!nv_device_diagnostic_checkpoints) { - return; - } + // Wait for the log to flush and for Nsight Aftermath to dump the results + std::this_thread::sleep_for(std::chrono::seconds{3}); +} - [[maybe_unused]] const std::vector data = graphics_queue.getCheckpointDataNV(dld); - // Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be - // executed. It can be done on a debugger by evaluating the expression: - // *(VKGraphicsPipeline*)data[0] +void VKDevice::SaveShader(const std::vector<u32>& spirv) const { + nsight_aftermath_tracker.SaveShader(spirv); } -bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const { +bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { // Disable for now to avoid converting ASTC twice. static constexpr std::array astc_formats = { - vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, - vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock, - vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock, - vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock, - vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock, - vk::Format::eAstc8x5UnormBlock, vk::Format::eAstc8x5SrgbBlock, - vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock, - vk::Format::eAstc8x8UnormBlock, vk::Format::eAstc8x8SrgbBlock, - vk::Format::eAstc10x5UnormBlock, vk::Format::eAstc10x5SrgbBlock, - vk::Format::eAstc10x6UnormBlock, vk::Format::eAstc10x6SrgbBlock, - vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock, - vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock, - vk::Format::eAstc12x10UnormBlock, vk::Format::eAstc12x10SrgbBlock, - vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock}; + VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK, VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK, VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK, VK_FORMAT_ASTC_10x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK, VK_FORMAT_ASTC_10x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + }; if (!features.textureCompressionASTC_LDR) { return false; } const auto format_feature_usage{ - vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | - vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | - vk::FormatFeatureFlagBits::eTransferDst}; + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT}; for (const auto format : astc_formats) { - const auto format_properties{physical.getFormatProperties(format, dld)}; + const auto format_properties{physical.GetFormatProperties(format)}; if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { return false; } @@ -267,61 +400,49 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features return true; } -bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, +bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const { const auto it = format_properties.find(wanted_format); if (it == format_properties.end()) { - UNIMPLEMENTED_MSG("Unimplemented format query={}", vk::to_string(wanted_format)); + UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format); return true; } const auto supported_usage = GetFormatFeatures(it->second, format_type); return (supported_usage & wanted_usage) == wanted_usage; } -bool VKDevice::IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface, - const vk::DispatchLoaderDynamic& dld) { - static constexpr std::array required_extensions = { - VK_KHR_SWAPCHAIN_EXTENSION_NAME, - VK_KHR_16BIT_STORAGE_EXTENSION_NAME, - VK_KHR_8BIT_STORAGE_EXTENSION_NAME, - VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, - VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, - VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, - VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, - VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, - }; +bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { bool is_suitable = true; - std::bitset<required_extensions.size()> available_extensions{}; + std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; - for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dld)) { - for (std::size_t i = 0; i < required_extensions.size(); ++i) { + for (const auto& prop : physical.EnumerateDeviceExtensionProperties()) { + for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { if (available_extensions[i]) { continue; } - available_extensions[i] = - required_extensions[i] == std::string_view{prop.extensionName}; + const std::string_view name{prop.extensionName}; + available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; } } if (!available_extensions.all()) { - for (std::size_t i = 0; i < required_extensions.size(); ++i) { + for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { if (available_extensions[i]) { continue; } - LOG_ERROR(Render_Vulkan, "Missing required extension: {}", required_extensions[i]); + LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); is_suitable = false; } } bool has_graphics{}, has_present{}; - const auto queue_family_properties = physical.getQueueFamilyProperties(dld); + const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { const auto& family = queue_family_properties[i]; if (family.queueCount == 0) { continue; } - has_graphics |= - (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); - has_present |= physical.getSurfaceSupportKHR(i, surface, dld) != 0; + has_graphics |= family.queueFlags & VK_QUEUE_GRAPHICS_BIT; + has_present |= physical.GetSurfaceSupportKHR(i, surface); } if (!has_graphics || !has_present) { LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); @@ -329,7 +450,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface, } // TODO(Rodrigo): Check if the device matches all requeriments. - const auto properties{physical.getProperties(dld)}; + const auto properties{physical.GetProperties()}; const auto& limits{properties.limits}; constexpr u32 required_ubo_size = 65536; @@ -346,7 +467,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface, is_suitable = false; } - const auto features{physical.getFeatures(dld)}; + const auto features{physical.GetFeatures()}; const std::array feature_report = { std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), std::make_pair(features.independentBlend, "independentBlend"), @@ -380,7 +501,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface, std::vector<const char*> VKDevice::LoadExtensions() { std::vector<const char*> extensions; - const auto Test = [&](const vk::ExtensionProperties& extension, + const auto Test = [&](const VkExtensionProperties& extension, std::optional<std::reference_wrapper<bool>> status, const char* name, bool push) { if (extension.extensionName != std::string_view(name)) { @@ -394,22 +515,13 @@ std::vector<const char*> VKDevice::LoadExtensions() { } }; - extensions.reserve(15); - extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); - extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); - extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); - extensions.push_back(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME); - extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); - extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); - extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); - extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); - - [[maybe_unused]] const bool nsight = - std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); + extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); + extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); + bool has_khr_shader_float16_int8{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; - for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dld)) { + for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) { Test(extension, khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, @@ -424,43 +536,72 @@ std::vector<const char*> VKDevice::LoadExtensions() { Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { - Test(extension, nv_device_diagnostic_checkpoints, - VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true); + Test(extension, nv_device_diagnostics_config, + VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); } } + VkPhysicalDeviceFeatures2KHR features; + features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; + + VkPhysicalDeviceProperties2KHR properties; + properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + if (has_khr_shader_float16_int8) { - is_float16_supported = - GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dld).shaderFloat16; + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features; + float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; + float16_int8_features.pNext = nullptr; + features.pNext = &float16_int8_features; + + physical.GetFeatures2KHR(features); + is_float16_supported = float16_int8_features.shaderFloat16; extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } if (has_ext_subgroup_size_control) { - const auto features = - GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dld); - const auto properties = - GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dld); - - is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize; - - if (features.subgroupSizeControl && properties.minSubgroupSize <= GuestWarpSize && - properties.maxSubgroupSize >= GuestWarpSize) { + VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features; + subgroup_features.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT; + subgroup_features.pNext = nullptr; + features.pNext = &subgroup_features; + physical.GetFeatures2KHR(features); + + VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_properties; + subgroup_properties.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT; + subgroup_properties.pNext = nullptr; + properties.pNext = &subgroup_properties; + physical.GetProperties2KHR(properties); + + is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; + + if (subgroup_features.subgroupSizeControl && + subgroup_properties.minSubgroupSize <= GuestWarpSize && + subgroup_properties.maxSubgroupSize >= GuestWarpSize) { extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); - guest_warp_stages = properties.requiredSubgroupSizeStages; + guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; } } else { is_warp_potentially_bigger = true; } if (has_ext_transform_feedback) { - const auto features = - GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dld); - const auto properties = - GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dld); - - if (features.transformFeedback && features.geometryStreams && - properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && - properties.transformFeedbackQueries && properties.transformFeedbackDraw) { + VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; + tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; + tfb_features.pNext = nullptr; + features.pNext = &tfb_features; + physical.GetFeatures2KHR(features); + + VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties; + tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; + tfb_properties.pNext = nullptr; + properties.pNext = &tfb_properties; + physical.GetProperties2KHR(properties); + + if (tfb_features.transformFeedback && tfb_features.geometryStreams && + tfb_properties.maxTransformFeedbackStreams >= 4 && + tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries && + tfb_properties.transformFeedbackDraw) { extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); ext_transform_feedback = true; } @@ -469,10 +610,10 @@ std::vector<const char*> VKDevice::LoadExtensions() { return extensions; } -void VKDevice::SetupFamilies(vk::SurfaceKHR surface) { +void VKDevice::SetupFamilies(VkSurfaceKHR surface) { std::optional<u32> graphics_family_, present_family_; - const auto queue_family_properties = physical.getQueueFamilyProperties(dld); + const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { if (graphics_family_ && present_family_) break; @@ -481,10 +622,10 @@ void VKDevice::SetupFamilies(vk::SurfaceKHR surface) { if (queue_family.queueCount == 0) continue; - if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) { + if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { graphics_family_ = i; } - if (physical.getSurfaceSupportKHR(i, surface, dld)) { + if (physical.GetSurfaceSupportKHR(i, surface)) { present_family_ = i; } } @@ -495,120 +636,48 @@ void VKDevice::SetupFamilies(vk::SurfaceKHR surface) { } void VKDevice::SetupFeatures() { - const auto supported_features{physical.getFeatures(dld)}; + const auto supported_features{physical.GetFeatures()}; is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); } void VKDevice::CollectTelemetryParameters() { - const auto driver = GetProperties<vk::PhysicalDeviceDriverPropertiesKHR>(physical, dld); + VkPhysicalDeviceDriverPropertiesKHR driver; + driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR; + driver.pNext = nullptr; + + VkPhysicalDeviceProperties2KHR properties; + properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + properties.pNext = &driver; + physical.GetProperties2KHR(properties); + driver_id = driver.driverID; vendor_name = driver.driverName; - const auto extensions = physical.enumerateDeviceExtensionProperties(nullptr, dld); + const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); reported_extensions.reserve(std::size(extensions)); for (const auto& extension : extensions) { reported_extensions.push_back(extension.extensionName); } } -std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { - static const float QUEUE_PRIORITY = 1.0f; +std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { + static constexpr float QUEUE_PRIORITY = 1.0f; - std::set<u32> unique_queue_families = {graphics_family, present_family}; - std::vector<vk::DeviceQueueCreateInfo> queue_cis; + std::unordered_set<u32> unique_queue_families = {graphics_family, present_family}; + std::vector<VkDeviceQueueCreateInfo> queue_cis; - for (u32 queue_family : unique_queue_families) - queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY}); + for (const u32 queue_family : unique_queue_families) { + VkDeviceQueueCreateInfo& ci = queue_cis.emplace_back(); + ci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.queueFamilyIndex = queue_family; + ci.queueCount = 1; + ci.pQueuePriorities = &QUEUE_PRIORITY; + } return queue_cis; } -std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( - const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical) { - static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, - vk::Format::eA8B8G8R8UintPack32, - vk::Format::eA8B8G8R8SnormPack32, - vk::Format::eA8B8G8R8SrgbPack32, - vk::Format::eB5G6R5UnormPack16, - vk::Format::eA2B10G10R10UnormPack32, - vk::Format::eA1R5G5B5UnormPack16, - vk::Format::eR32G32B32A32Sfloat, - vk::Format::eR32G32B32A32Uint, - vk::Format::eR32G32Sfloat, - vk::Format::eR32G32Uint, - vk::Format::eR16G16B16A16Uint, - vk::Format::eR16G16B16A16Snorm, - vk::Format::eR16G16B16A16Unorm, - vk::Format::eR16G16Unorm, - vk::Format::eR16G16Snorm, - vk::Format::eR16G16Sfloat, - vk::Format::eR16Unorm, - vk::Format::eR8G8B8A8Srgb, - vk::Format::eR8G8Unorm, - vk::Format::eR8G8Snorm, - vk::Format::eR8Unorm, - vk::Format::eR8Uint, - vk::Format::eB10G11R11UfloatPack32, - vk::Format::eR32Sfloat, - vk::Format::eR32Uint, - vk::Format::eR32Sint, - vk::Format::eR16Sfloat, - vk::Format::eR16G16B16A16Sfloat, - vk::Format::eB8G8R8A8Unorm, - vk::Format::eR4G4B4A4UnormPack16, - vk::Format::eD32Sfloat, - vk::Format::eD16Unorm, - vk::Format::eD16UnormS8Uint, - vk::Format::eD24UnormS8Uint, - vk::Format::eD32SfloatS8Uint, - vk::Format::eBc1RgbaUnormBlock, - vk::Format::eBc2UnormBlock, - vk::Format::eBc3UnormBlock, - vk::Format::eBc4UnormBlock, - vk::Format::eBc5UnormBlock, - vk::Format::eBc5SnormBlock, - vk::Format::eBc7UnormBlock, - vk::Format::eBc6HUfloatBlock, - vk::Format::eBc6HSfloatBlock, - vk::Format::eBc1RgbaSrgbBlock, - vk::Format::eBc2SrgbBlock, - vk::Format::eBc3SrgbBlock, - vk::Format::eBc7SrgbBlock, - vk::Format::eAstc4x4UnormBlock, - vk::Format::eAstc4x4SrgbBlock, - vk::Format::eAstc5x4UnormBlock, - vk::Format::eAstc5x4SrgbBlock, - vk::Format::eAstc5x5UnormBlock, - vk::Format::eAstc5x5SrgbBlock, - vk::Format::eAstc6x5UnormBlock, - vk::Format::eAstc6x5SrgbBlock, - vk::Format::eAstc6x6UnormBlock, - vk::Format::eAstc6x6SrgbBlock, - vk::Format::eAstc8x5UnormBlock, - vk::Format::eAstc8x5SrgbBlock, - vk::Format::eAstc8x6UnormBlock, - vk::Format::eAstc8x6SrgbBlock, - vk::Format::eAstc8x8UnormBlock, - vk::Format::eAstc8x8SrgbBlock, - vk::Format::eAstc10x5UnormBlock, - vk::Format::eAstc10x5SrgbBlock, - vk::Format::eAstc10x6UnormBlock, - vk::Format::eAstc10x6SrgbBlock, - vk::Format::eAstc10x8UnormBlock, - vk::Format::eAstc10x8SrgbBlock, - vk::Format::eAstc10x10UnormBlock, - vk::Format::eAstc10x10SrgbBlock, - vk::Format::eAstc12x10UnormBlock, - vk::Format::eAstc12x10SrgbBlock, - vk::Format::eAstc12x12UnormBlock, - vk::Format::eAstc12x12SrgbBlock, - vk::Format::eE5B9G9R9UfloatPack32}; - std::unordered_map<vk::Format, vk::FormatProperties> format_properties; - for (const auto format : formats) { - format_properties.emplace(format, physical.getFormatProperties(format, dld)); - } - return format_properties; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index d9d809852..a4d841e26 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -8,8 +8,10 @@ #include <string_view> #include <unordered_map> #include <vector> + #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -22,12 +24,12 @@ const u32 GuestWarpSize = 32; /// Handles data specific to a physical device. class VKDevice final { public: - explicit VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical, - vk::SurfaceKHR surface); + explicit VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, + const vk::InstanceDispatch& dld); ~VKDevice(); /// Initializes the device. Returns true on success. - bool Create(vk::Instance instance); + bool Create(); /** * Returns a format supported by the device for the passed requeriments. @@ -36,20 +38,23 @@ public: * @param format_type Format type usage. * @returns A format supported by the device. */ - vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, - FormatType format_type) const; + VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const; /// Reports a device loss. void ReportLoss() const; + /// Reports a shader to Nsight Aftermath. + void SaveShader(const std::vector<u32>& spirv) const; + /// Returns the dispatch loader with direct function pointers of the device. - const vk::DispatchLoaderDynamic& GetDispatchLoader() const { + const vk::DeviceDispatch& GetDispatchLoader() const { return dld; } /// Returns the logical device. - vk::Device GetLogical() const { - return logical.get(); + const vk::Device& GetLogical() const { + return logical; } /// Returns the physical device. @@ -79,7 +84,7 @@ public: /// Returns true if the device is integrated with the host CPU. bool IsIntegrated() const { - return properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu; + return properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; } /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. @@ -98,27 +103,27 @@ public: } /// Returns the driver ID. - vk::DriverIdKHR GetDriverID() const { + VkDriverIdKHR GetDriverID() const { return driver_id; } /// Returns uniform buffer alignment requeriment. - vk::DeviceSize GetUniformBufferAlignment() const { + VkDeviceSize GetUniformBufferAlignment() const { return properties.limits.minUniformBufferOffsetAlignment; } /// Returns storage alignment requeriment. - vk::DeviceSize GetStorageBufferAlignment() const { + VkDeviceSize GetStorageBufferAlignment() const { return properties.limits.minStorageBufferOffsetAlignment; } /// Returns the maximum range for storage buffers. - vk::DeviceSize GetMaxStorageBufferRange() const { + VkDeviceSize GetMaxStorageBufferRange() const { return properties.limits.maxStorageBufferRange; } /// Returns the maximum size for push constants. - vk::DeviceSize GetMaxPushConstantsSize() const { + VkDeviceSize GetMaxPushConstantsSize() const { return properties.limits.maxPushConstantsSize; } @@ -138,8 +143,8 @@ public: } /// Returns true if the device can be forced to use the guest warp size. - bool IsGuestWarpSizeSupported(vk::ShaderStageFlagBits stage) const { - return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; + bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const { + return guest_warp_stages & stage; } /// Returns true if formatless image load is supported. @@ -172,11 +177,6 @@ public: return ext_transform_feedback; } - /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. - bool IsNvDeviceDiagnosticCheckpoints() const { - return nv_device_diagnostic_checkpoints; - } - /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { return vendor_name; @@ -188,15 +188,14 @@ public: } /// Checks if the physical device is suitable. - static bool IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface, - const vk::DispatchLoaderDynamic& dld); + static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); private: /// Loads extensions into a vector and stores available ones in this object. std::vector<const char*> LoadExtensions(); /// Sets up queue families. - void SetupFamilies(vk::SurfaceKHR surface); + void SetupFamilies(VkSurfaceKHR surface); /// Sets up device features. void SetupFeatures(); @@ -205,46 +204,45 @@ private: void CollectTelemetryParameters(); /// Returns a list of queue initialization descriptors. - std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; + std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; /// Returns true if ASTC textures are natively supported. - bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const; + bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; /// Returns true if a format is supported. - bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, + bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; - /// Returns the device properties for Vulkan formats. - static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( - const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical); - - vk::DispatchLoaderDynamic dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - vk::PhysicalDeviceProperties properties; ///< Device properties. - UniqueDevice logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 graphics_family{}; ///< Main graphics queue family index. - u32 present_family{}; ///< Main present queue family index. - vk::DriverIdKHR driver_id{}; ///< Driver ID. - vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed - bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool is_float16_supported{}; ///< Support for float16 arithmetics. - bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + VkPhysicalDeviceProperties properties; ///< Device properties. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + VkDriverIdKHR driver_id{}; ///< Driver ID. + VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool is_float16_supported{}; ///< Support for float16 arithmetics. + bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. + bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. // Telemetry parameters std::string vendor_name; ///< Device's driver name. std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. /// Format properties dictionary. - std::unordered_map<vk::Format, vk::FormatProperties> format_properties; + std::unordered_map<VkFormat, VkFormatProperties> format_properties; + + /// Nsight Aftermath GPU crash tracker + NsightAftermathTracker nsight_aftermath_tracker; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6a02403c1..8332b42aa 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -2,11 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> +#include <cstring> #include <vector> + #include "common/assert.h" #include "common/common_types.h" #include "common/microprofile.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" @@ -16,6 +18,7 @@ #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -23,21 +26,27 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); namespace { -vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) { - return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail), - MaxwellToVK::StencilOp(face.action_depth_pass), - MaxwellToVK::StencilOp(face.action_depth_fail), - MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0); +template <class StencilFace> +VkStencilOpState GetStencilFaceState(const StencilFace& face) { + VkStencilOpState state; + state.failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()); + state.passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()); + state.depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()); + state.compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()); + state.compareMask = 0; + state.writeMask = 0; + state.reference = 0; + return state; } -bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) { +bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { static constexpr std::array unsupported_topologies = { - vk::PrimitiveTopology::ePointList, - vk::PrimitiveTopology::eLineList, - vk::PrimitiveTopology::eTriangleList, - vk::PrimitiveTopology::eLineListWithAdjacency, - vk::PrimitiveTopology::eTriangleListWithAdjacency, - vk::PrimitiveTopology::ePatchList}; + VK_PRIMITIVE_TOPOLOGY_POINT_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), topology) == std::end(unsupported_topologies); } @@ -49,7 +58,7 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche VKUpdateDescriptorQueue& update_descriptor_queue, VKRenderPassCache& renderpass_cache, const GraphicsPipelineCacheKey& key, - const std::vector<vk::DescriptorSetLayoutBinding>& bindings, + vk::Span<VkDescriptorSetLayoutBinding> bindings, const SPIRVProgram& program) : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, @@ -63,7 +72,7 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche VKGraphicsPipeline::~VKGraphicsPipeline() = default; -vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { +VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { if (!descriptor_template) { return {}; } @@ -72,27 +81,32 @@ vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { return set; } -UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( - const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const { - const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( - {}, static_cast<u32>(bindings.size()), bindings.data()); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); +vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( + vk::Span<VkDescriptorSetLayoutBinding> bindings) const { + VkDescriptorSetLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.bindingCount = bindings.size(); + ci.pBindings = bindings.data(); + return device.GetLogical().CreateDescriptorSetLayout(ci); } -UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { - const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0, - nullptr); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); +vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { + VkPipelineLayoutCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.setLayoutCount = 1; + ci.pSetLayouts = descriptor_set_layout.address(); + ci.pushConstantRangeCount = 0; + ci.pPushConstantRanges = nullptr; + return device.GetLogical().CreatePipelineLayout(ci); } -UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate( +vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( const SPIRVProgram& program) const { - std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; + std::vector<VkDescriptorUpdateTemplateEntry> template_entries; u32 binding = 0; u32 offset = 0; for (const auto& stage : program) { @@ -102,168 +116,273 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat } if (template_entries.empty()) { // If the shader doesn't use descriptor sets, skip template creation. - return UniqueDescriptorUpdateTemplate{}; + return {}; } - const vk::DescriptorUpdateTemplateCreateInfo template_ci( - {}, static_cast<u32>(template_entries.size()), template_entries.data(), - vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, - vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); + VkDescriptorUpdateTemplateCreateInfoKHR ci; + ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; + ci.pNext = nullptr; + ci.flags = 0; + ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); + ci.pDescriptorUpdateEntries = template_entries.data(); + ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + ci.descriptorSetLayout = *descriptor_set_layout; + ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + ci.pipelineLayout = *layout; + ci.set = DESCRIPTOR_SET; + return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); } -std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules( +std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( const SPIRVProgram& program) const { - std::vector<UniqueShaderModule> modules; - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); + VkShaderModuleCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + + std::vector<vk::ShaderModule> modules; + modules.reserve(Maxwell::MaxShaderStage); for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { const auto& stage = program[i]; if (!stage) { continue; } - const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32), - stage->code.data()); - modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld)); + + device.SaveShader(stage->code); + + ci.codeSize = stage->code.size() * sizeof(u32); + ci.pCode = stage->code.data(); + modules.push_back(device.GetLogical().CreateShaderModule(ci)); } return modules; } -UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, - const SPIRVProgram& program) const { +vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, + const SPIRVProgram& program) const { const auto& vi = fixed_state.vertex_input; - const auto& ia = fixed_state.input_assembly; const auto& ds = fixed_state.depth_stencil; const auto& cd = fixed_state.color_blending; - const auto& ts = fixed_state.tessellation; const auto& rs = fixed_state.rasterizer; - std::vector<vk::VertexInputBindingDescription> vertex_bindings; - std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; - for (std::size_t i = 0; i < vi.num_bindings; ++i) { - const auto& binding = vi.bindings[i]; - const bool instanced = binding.divisor != 0; - const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex; - vertex_bindings.emplace_back(binding.index, binding.stride, rate); + std::vector<VkVertexInputBindingDescription> vertex_bindings; + std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; + for (std::size_t index = 0; index < std::size(vi.bindings); ++index) { + const auto& binding = vi.bindings[index]; + if (!binding.enabled) { + continue; + } + const bool instanced = vi.binding_divisors[index] != 0; + const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + + auto& vertex_binding = vertex_bindings.emplace_back(); + vertex_binding.binding = static_cast<u32>(index); + vertex_binding.stride = binding.stride; + vertex_binding.inputRate = rate; + if (instanced) { - vertex_binding_divisors.emplace_back(binding.index, binding.divisor); + auto& binding_divisor = vertex_binding_divisors.emplace_back(); + binding_divisor.binding = static_cast<u32>(index); + binding_divisor.divisor = vi.binding_divisors[index]; } } - std::vector<vk::VertexInputAttributeDescription> vertex_attributes; + std::vector<VkVertexInputAttributeDescription> vertex_attributes; const auto& input_attributes = program[0]->entries.attributes; - for (std::size_t i = 0; i < vi.num_attributes; ++i) { - const auto& attribute = vi.attributes[i]; - if (input_attributes.find(attribute.index) == input_attributes.end()) { + for (std::size_t index = 0; index < std::size(vi.attributes); ++index) { + const auto& attribute = vi.attributes[index]; + if (!attribute.enabled) { + continue; + } + if (input_attributes.find(static_cast<u32>(index)) == input_attributes.end()) { // Skip attributes not used by the vertex shaders. continue; } - vertex_attributes.emplace_back(attribute.index, attribute.buffer, - MaxwellToVK::VertexFormat(attribute.type, attribute.size), - attribute.offset); + auto& vertex_attribute = vertex_attributes.emplace_back(); + vertex_attribute.location = static_cast<u32>(index); + vertex_attribute.binding = attribute.buffer; + vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()); + vertex_attribute.offset = attribute.offset; } - vk::PipelineVertexInputStateCreateInfo vertex_input_ci( - {}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(), - static_cast<u32>(vertex_attributes.size()), vertex_attributes.data()); - - const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci( - static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data()); + VkPipelineVertexInputStateCreateInfo vertex_input_ci; + vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_input_ci.pNext = nullptr; + vertex_input_ci.flags = 0; + vertex_input_ci.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()); + vertex_input_ci.pVertexBindingDescriptions = vertex_bindings.data(); + vertex_input_ci.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()); + vertex_input_ci.pVertexAttributeDescriptions = vertex_attributes.data(); + + VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci; + input_divisor_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT; + input_divisor_ci.pNext = nullptr; + input_divisor_ci.vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()); + input_divisor_ci.pVertexBindingDivisors = vertex_binding_divisors.data(); if (!vertex_binding_divisors.empty()) { - vertex_input_ci.pNext = &vertex_input_divisor_ci; + vertex_input_ci.pNext = &input_divisor_ci; } - const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); - const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci( - {}, primitive_topology, - ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology)); - - const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points); - - const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr, - Maxwell::NumViewports, nullptr); - - // TODO(Rodrigo): Find out what's the default register value for front face - const vk::PipelineRasterizationStateCreateInfo rasterizer_ci( - {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill, - rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone, - MaxwellToVK::FrontFace(rs.front_face), rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f); - - const vk::PipelineMultisampleStateCreateInfo multisampling_ci( - {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false); - - const vk::CompareOp depth_test_compare = ds.depth_test_enable - ? MaxwellToVK::ComparisonOp(ds.depth_test_function) - : vk::CompareOp::eAlways; - - const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci( - {}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable, - ds.stencil_enable, GetStencilFaceState(ds.front_stencil), - GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f); - - std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; - const std::size_t num_attachments = - std::min(cd.attachments_count, renderpass_params.color_attachments.size()); - for (std::size_t i = 0; i < num_attachments; ++i) { - constexpr std::array component_table{ - vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG, - vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA}; - const auto& blend = cd.attachments[i]; - - vk::ColorComponentFlags color_components{}; - for (std::size_t j = 0; j < component_table.size(); ++j) { - if (blend.components[j]) - color_components |= component_table[j]; + VkPipelineInputAssemblyStateCreateInfo input_assembly_ci; + input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly_ci.pNext = nullptr; + input_assembly_ci.flags = 0; + input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, rs.Topology()); + input_assembly_ci.primitiveRestartEnable = + rs.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology); + + VkPipelineTessellationStateCreateInfo tessellation_ci; + tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; + tessellation_ci.pNext = nullptr; + tessellation_ci.flags = 0; + tessellation_ci.patchControlPoints = rs.patch_control_points_minus_one.Value() + 1; + + VkPipelineViewportStateCreateInfo viewport_ci; + viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_ci.pNext = nullptr; + viewport_ci.flags = 0; + viewport_ci.viewportCount = Maxwell::NumViewports; + viewport_ci.pViewports = nullptr; + viewport_ci.scissorCount = Maxwell::NumViewports; + viewport_ci.pScissors = nullptr; + + VkPipelineRasterizationStateCreateInfo rasterization_ci; + rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_ci.pNext = nullptr; + rasterization_ci.flags = 0; + rasterization_ci.depthClampEnable = rs.depth_clamp_enable; + rasterization_ci.rasterizerDiscardEnable = VK_FALSE; + rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; + rasterization_ci.cullMode = + rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE; + rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.FrontFace()); + rasterization_ci.depthBiasEnable = rs.depth_bias_enable; + rasterization_ci.depthBiasConstantFactor = 0.0f; + rasterization_ci.depthBiasClamp = 0.0f; + rasterization_ci.depthBiasSlopeFactor = 0.0f; + rasterization_ci.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo multisample_ci; + multisample_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisample_ci.pNext = nullptr; + multisample_ci.flags = 0; + multisample_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + multisample_ci.sampleShadingEnable = VK_FALSE; + multisample_ci.minSampleShading = 0.0f; + multisample_ci.pSampleMask = nullptr; + multisample_ci.alphaToCoverageEnable = VK_FALSE; + multisample_ci.alphaToOneEnable = VK_FALSE; + + VkPipelineDepthStencilStateCreateInfo depth_stencil_ci; + depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + depth_stencil_ci.pNext = nullptr; + depth_stencil_ci.flags = 0; + depth_stencil_ci.depthTestEnable = ds.depth_test_enable; + depth_stencil_ci.depthWriteEnable = ds.depth_write_enable; + depth_stencil_ci.depthCompareOp = + ds.depth_test_enable ? MaxwellToVK::ComparisonOp(ds.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS; + depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable; + depth_stencil_ci.stencilTestEnable = ds.stencil_enable; + depth_stencil_ci.front = GetStencilFaceState(ds.front); + depth_stencil_ci.back = GetStencilFaceState(ds.back); + depth_stencil_ci.minDepthBounds = 0.0f; + depth_stencil_ci.maxDepthBounds = 0.0f; + + std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; + const std::size_t num_attachments = renderpass_params.color_attachments.size(); + for (std::size_t index = 0; index < num_attachments; ++index) { + static constexpr std::array COMPONENT_TABLE = { + VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, + VK_COLOR_COMPONENT_A_BIT}; + const auto& blend = cd.attachments[index]; + + VkColorComponentFlags color_components = 0; + for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { + if (blend.Mask()[i]) { + color_components |= COMPONENT_TABLE[i]; + } } - cb_attachments[i] = vk::PipelineColorBlendAttachmentState( - blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func), - MaxwellToVK::BlendFactor(blend.dst_rgb_func), - MaxwellToVK::BlendEquation(blend.rgb_equation), - MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func), - MaxwellToVK::BlendEquation(blend.a_equation), color_components); + VkPipelineColorBlendAttachmentState& attachment = cb_attachments[index]; + attachment.blendEnable = blend.enable != 0; + attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()); + attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()); + attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()); + attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()); + attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()); + attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()); + attachment.colorWriteMask = color_components; } - const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy, - static_cast<u32>(num_attachments), - cb_attachments.data(), {}); - - constexpr std::array dynamic_states = { - vk::DynamicState::eViewport, vk::DynamicState::eScissor, - vk::DynamicState::eDepthBias, vk::DynamicState::eBlendConstants, - vk::DynamicState::eDepthBounds, vk::DynamicState::eStencilCompareMask, - vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference}; - const vk::PipelineDynamicStateCreateInfo dynamic_state_ci( - {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data()); - - vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; + + VkPipelineColorBlendStateCreateInfo color_blend_ci; + color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blend_ci.pNext = nullptr; + color_blend_ci.flags = 0; + color_blend_ci.logicOpEnable = VK_FALSE; + color_blend_ci.logicOp = VK_LOGIC_OP_COPY; + color_blend_ci.attachmentCount = static_cast<u32>(num_attachments); + color_blend_ci.pAttachments = cb_attachments.data(); + std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants)); + + static constexpr std::array dynamic_states = { + VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE}; + + VkPipelineDynamicStateCreateInfo dynamic_state_ci; + dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state_ci.pNext = nullptr; + dynamic_state_ci.flags = 0; + dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size()); + dynamic_state_ci.pDynamicStates = dynamic_states.data(); + + VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; + subgroup_size_ci.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; + subgroup_size_ci.pNext = nullptr; subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; - std::vector<vk::PipelineShaderStageCreateInfo> shader_stages; + std::vector<VkPipelineShaderStageCreateInfo> shader_stages; std::size_t module_index = 0; for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { if (!program[stage]) { continue; } - const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage); - const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum); - auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage, - *modules[module_index++], "main", nullptr); - if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) { + VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); + stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage_ci.pNext = nullptr; + stage_ci.flags = 0; + stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); + stage_ci.module = *modules[module_index++]; + stage_ci.pName = "main"; + stage_ci.pSpecializationInfo = nullptr; + + if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { stage_ci.pNext = &subgroup_size_ci; } } - const vk::GraphicsPipelineCreateInfo create_info( - {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci, - &input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci, - &depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld); + VkGraphicsPipelineCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.stageCount = static_cast<u32>(shader_stages.size()); + ci.pStages = shader_stages.data(); + ci.pVertexInputState = &vertex_input_ci; + ci.pInputAssemblyState = &input_assembly_ci; + ci.pTessellationState = &tessellation_ci; + ci.pViewportState = &viewport_ci; + ci.pRasterizationState = &rasterization_ci; + ci.pMultisampleState = &multisample_ci; + ci.pDepthStencilState = &depth_stencil_ci; + ci.pColorBlendState = &color_blend_ci; + ci.pDynamicState = &dynamic_state_ci; + ci.layout = *layout; + ci.renderPass = renderpass; + ci.subpass = 0; + ci.basePipelineHandle = nullptr; + ci.basePipelineIndex = 0; + return device.GetLogical().CreateGraphicsPipeline(ci); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4f5e4ea2d..7aba70960 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -11,12 +11,12 @@ #include <vector> #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -39,52 +39,52 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue, VKRenderPassCache& renderpass_cache, const GraphicsPipelineCacheKey& key, - const std::vector<vk::DescriptorSetLayoutBinding>& bindings, + vk::Span<VkDescriptorSetLayoutBinding> bindings, const SPIRVProgram& program); ~VKGraphicsPipeline(); - vk::DescriptorSet CommitDescriptorSet(); + VkDescriptorSet CommitDescriptorSet(); - vk::Pipeline GetHandle() const { + VkPipeline GetHandle() const { return *pipeline; } - vk::PipelineLayout GetLayout() const { + VkPipelineLayout GetLayout() const { return *layout; } - vk::RenderPass GetRenderPass() const { + VkRenderPass GetRenderPass() const { return renderpass; } private: - UniqueDescriptorSetLayout CreateDescriptorSetLayout( - const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const; + vk::DescriptorSetLayout CreateDescriptorSetLayout( + vk::Span<VkDescriptorSetLayoutBinding> bindings) const; - UniquePipelineLayout CreatePipelineLayout() const; + vk::PipelineLayout CreatePipelineLayout() const; - UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate( + vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( const SPIRVProgram& program) const; - std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const; + std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; - UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params, - const SPIRVProgram& program) const; + vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, + const SPIRVProgram& program) const; const VKDevice& device; VKScheduler& scheduler; const FixedPipelineState fixed_state; const u64 hash; - UniqueDescriptorSetLayout descriptor_set_layout; + vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; VKUpdateDescriptorQueue& update_descriptor_queue; - UniquePipelineLayout layout; - UniqueDescriptorUpdateTemplate descriptor_template; - std::vector<UniqueShaderModule> modules; + vk::PipelineLayout layout; + vk::DescriptorUpdateTemplateKHR descriptor_template; + std::vector<vk::ShaderModule> modules; - vk::RenderPass renderpass; - UniquePipeline pipeline; + VkRenderPass renderpass; + vk::Pipeline pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp index 4bcbef959..9bceb3861 100644 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ b/src/video_core/renderer_vulkan/vk_image.cpp @@ -6,22 +6,21 @@ #include <vector> #include "common/assert.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_image.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { -VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, - const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask) +VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, const VkImageCreateInfo& image_ci, + VkImageAspectFlags aspect_mask) : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask}, image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} { UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0, "Queue family tracking is not implemented"); - const auto dev = device.GetLogical(); - image = dev.createImageUnique(image_ci, nullptr, device.GetDispatchLoader()); + image = device.GetLogical().CreateImage(image_ci); const u32 num_ranges = image_num_layers * image_num_levels; barriers.resize(num_ranges); @@ -31,8 +30,8 @@ VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, VKImage::~VKImage() = default; void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, - vk::ImageLayout new_layout) { + VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, + VkImageLayout new_layout) { if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { return; } @@ -43,9 +42,21 @@ void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num const u32 layer = base_layer + layer_it; const u32 level = base_level + level_it; auto& state = GetSubrangeState(layer, level); - barriers[cursor] = vk::ImageMemoryBarrier( - state.access, new_access, state.layout, new_layout, VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, *image, {aspect_mask, level, 1, layer, 1}); + auto& barrier = barriers[cursor]; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = state.access; + barrier.dstAccessMask = new_access; + barrier.oldLayout = state.layout; + barrier.newLayout = new_layout; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = *image; + barrier.subresourceRange.aspectMask = aspect_mask; + barrier.subresourceRange.baseMipLevel = level; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = layer; + barrier.subresourceRange.layerCount = 1; state.access = new_access; state.layout = new_layout; } @@ -53,16 +64,16 @@ void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([barriers = barriers, cursor](auto cmdbuf, auto& dld) { + scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) { // TODO(Rodrigo): Implement a way to use the latest stage across subresources. - constexpr auto stage_stub = vk::PipelineStageFlagBits::eAllCommands; - cmdbuf.pipelineBarrier(stage_stub, stage_stub, {}, 0, nullptr, 0, nullptr, - static_cast<u32>(cursor), barriers.data(), dld); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {}, + vk::Span(barriers.data(), cursor)); }); } bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept { + VkAccessFlags new_access, VkImageLayout new_layout) noexcept { const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && base_level == 0 && num_levels == image_num_levels; if (!is_full_range) { @@ -91,11 +102,21 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num void VKImage::CreatePresentView() { // Image type has to be 2D to be presented. - const vk::ImageViewCreateInfo image_view_ci({}, *image, vk::ImageViewType::e2D, format, {}, - {aspect_mask, 0, 1, 0, 1}); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - present_view = dev.createImageViewUnique(image_view_ci, nullptr, dld); + VkImageViewCreateInfo image_view_ci; + image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + image_view_ci.pNext = nullptr; + image_view_ci.flags = 0; + image_view_ci.image = *image; + image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + image_view_ci.format = format; + image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; + image_view_ci.subresourceRange.aspectMask = aspect_mask; + image_view_ci.subresourceRange.baseMipLevel = 0; + image_view_ci.subresourceRange.levelCount = 1; + image_view_ci.subresourceRange.baseArrayLayer = 0; + image_view_ci.subresourceRange.layerCount = 1; + present_view = device.GetLogical().CreateImageView(image_view_ci); } VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h index b78242512..b4d7229e5 100644 --- a/src/video_core/renderer_vulkan/vk_image.h +++ b/src/video_core/renderer_vulkan/vk_image.h @@ -8,7 +8,7 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -18,16 +18,16 @@ class VKScheduler; class VKImage { public: explicit VKImage(const VKDevice& device, VKScheduler& scheduler, - const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask); + const VkImageCreateInfo& image_ci, VkImageAspectFlags aspect_mask); ~VKImage(); /// Records in the passed command buffer an image transition and updates the state of the image. void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, - vk::ImageLayout new_layout); + VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, + VkImageLayout new_layout); /// Returns a view compatible with presentation, the image has to be 2D. - vk::ImageView GetPresentView() { + VkImageView GetPresentView() { if (!present_view) { CreatePresentView(); } @@ -35,28 +35,28 @@ public: } /// Returns the Vulkan image handler. - vk::Image GetHandle() const { - return *image; + const vk::Image& GetHandle() const { + return image; } /// Returns the Vulkan format for this image. - vk::Format GetFormat() const { + VkFormat GetFormat() const { return format; } /// Returns the Vulkan aspect mask. - vk::ImageAspectFlags GetAspectMask() const { + VkImageAspectFlags GetAspectMask() const { return aspect_mask; } private: struct SubrangeState final { - vk::AccessFlags access{}; ///< Current access bits. - vk::ImageLayout layout = vk::ImageLayout::eUndefined; ///< Current image layout. + VkAccessFlags access = 0; ///< Current access bits. + VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout. }; bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept; + VkAccessFlags new_access, VkImageLayout new_layout) noexcept; /// Creates a presentation view. void CreatePresentView(); @@ -67,16 +67,16 @@ private: const VKDevice& device; ///< Device handler. VKScheduler& scheduler; ///< Device scheduler. - const vk::Format format; ///< Vulkan format. - const vk::ImageAspectFlags aspect_mask; ///< Vulkan aspect mask. - const u32 image_num_layers; ///< Number of layers. - const u32 image_num_levels; ///< Number of mipmap levels. + const VkFormat format; ///< Vulkan format. + const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask. + const u32 image_num_layers; ///< Number of layers. + const u32 image_num_levels; ///< Number of mipmap levels. - UniqueImage image; ///< Image handle. - UniqueImageView present_view; ///< Image view compatible with presentation. + vk::Image image; ///< Image handle. + vk::ImageView present_view; ///< Image view compatible with presentation. - std::vector<vk::ImageMemoryBarrier> barriers; ///< Pool of barriers. - std::vector<SubrangeState> subrange_states; ///< Current subrange state. + std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers. + std::vector<SubrangeState> subrange_states; ///< Current subrange state. bool state_diverged = false; ///< True when subresources mismatch in layout. }; diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 9cc9979d0..6a9e658bf 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -11,9 +11,9 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -30,17 +30,11 @@ u64 GetAllocationChunkSize(u64 required_size) { class VKMemoryAllocation final { public: explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, - vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type) - : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size}, - shifted_type{ShiftType(type)} {} - - ~VKMemoryAllocation() { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - dev.free(memory, nullptr, dld); - } + VkMemoryPropertyFlags properties, u64 allocation_size, u32 type) + : device{device}, memory{std::move(memory)}, properties{properties}, + allocation_size{allocation_size}, shifted_type{ShiftType(type)} {} - VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { + VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) { auto found = TryFindFreeSection(free_iterator, allocation_size, static_cast<u64>(commit_size), static_cast<u64>(alignment)); if (!found) { @@ -73,9 +67,8 @@ public: } /// Returns whether this allocation is compatible with the arguments. - bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const { - return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) && - (type_mask & shifted_type) != 0; + bool IsCompatible(VkMemoryPropertyFlags wanted_properties, u32 type_mask) const { + return (wanted_properties & properties) && (type_mask & shifted_type) != 0; } private: @@ -111,11 +104,11 @@ private: return std::nullopt; } - const VKDevice& device; ///< Vulkan device. - const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. - const vk::MemoryPropertyFlags properties; ///< Vulkan properties. - const u64 allocation_size; ///< Size of this allocation. - const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. + const VKDevice& device; ///< Vulkan device. + const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. + const VkMemoryPropertyFlags properties; ///< Vulkan properties. + const u64 allocation_size; ///< Size of this allocation. + const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. /// Hints where the next free region is likely going to be. u64 free_iterator{}; @@ -125,22 +118,20 @@ private: }; VKMemoryManager::VKMemoryManager(const VKDevice& device) - : device{device}, properties{device.GetPhysical().getMemoryProperties( - device.GetDispatchLoader())}, + : device{device}, properties{device.GetPhysical().GetMemoryProperties()}, is_memory_unified{GetMemoryUnified(properties)} {} VKMemoryManager::~VKMemoryManager() = default; -VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements, +VKMemoryCommit VKMemoryManager::Commit(const VkMemoryRequirements& requirements, bool host_visible) { const u64 chunk_size = GetAllocationChunkSize(requirements.size); // When a host visible commit is asked, search for host visible and coherent, otherwise search // for a fast device local type. - const vk::MemoryPropertyFlags wanted_properties = - host_visible - ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent - : vk::MemoryPropertyFlagBits::eDeviceLocal; + const VkMemoryPropertyFlags wanted_properties = + host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; if (auto commit = TryAllocCommit(requirements, wanted_properties)) { return commit; @@ -161,23 +152,19 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirement return commit; } -VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible); - dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); +VKMemoryCommit VKMemoryManager::Commit(const vk::Buffer& buffer, bool host_visible) { + auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), host_visible); + buffer.BindMemory(commit->GetMemory(), commit->GetOffset()); return commit; } -VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible); - dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); +VKMemoryCommit VKMemoryManager::Commit(const vk::Image& image, bool host_visible) { + auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), host_visible); + image.BindMemory(commit->GetMemory(), commit->GetOffset()); return commit; } -bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, +bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size) { const u32 type = [&] { for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { @@ -191,24 +178,26 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 return 0U; }(); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - // Try to allocate found type. - const vk::MemoryAllocateInfo memory_ai(size, type); - vk::DeviceMemory memory; - if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); - res != vk::Result::eSuccess) { - LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); + VkMemoryAllocateInfo memory_ai; + memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_ai.pNext = nullptr; + memory_ai.allocationSize = size; + memory_ai.memoryTypeIndex = type; + + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai); + if (!memory) { + LOG_CRITICAL(Render_Vulkan, "Device allocation failed!"); return false; } - allocations.push_back( - std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); + + allocations.push_back(std::make_unique<VKMemoryAllocation>(device, std::move(memory), + wanted_properties, size, type)); return true; } -VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements, - vk::MemoryPropertyFlags wanted_properties) { +VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags wanted_properties) { for (auto& allocation : allocations) { if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) { continue; @@ -220,10 +209,9 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& req return {}; } -/*static*/ bool VKMemoryManager::GetMemoryUnified( - const vk::PhysicalDeviceMemoryProperties& properties) { +bool VKMemoryManager::GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties) { for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) { - if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { + if (!(properties.memoryHeaps[heap_index].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)) { // Memory is considered unified when heaps are device local only. return false; } @@ -232,23 +220,19 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& req } VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, - vk::DeviceMemory memory, u64 begin, u64 end) - : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {} + const vk::DeviceMemory& memory, u64 begin, u64 end) + : device{device}, memory{memory}, interval{begin, end}, allocation{allocation} {} VKMemoryCommitImpl::~VKMemoryCommitImpl() { allocation->Free(this); } MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { - const auto dev = device.GetLogical(); - const auto address = reinterpret_cast<u8*>( - dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader())); - return MemoryMap{this, address}; + return MemoryMap{this, memory.Map(interval.first + offset_, size)}; } void VKMemoryCommitImpl::Unmap() const { - const auto dev = device.GetLogical(); - dev.unmapMemory(memory, device.GetDispatchLoader()); + memory.Unmap(); } MemoryMap VKMemoryCommitImpl::Map() const { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index cd00bb91b..5b6858e9b 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -8,7 +8,7 @@ #include <utility> #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -32,13 +32,13 @@ public: * memory. When passing false, it will try to allocate device local memory. * @returns A memory commit. */ - VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); + VKMemoryCommit Commit(const VkMemoryRequirements& requirements, bool host_visible); /// Commits memory required by the buffer and binds it. - VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible); + VKMemoryCommit Commit(const vk::Buffer& buffer, bool host_visible); /// Commits memory required by the image and binds it. - VKMemoryCommit Commit(vk::Image image, bool host_visible); + VKMemoryCommit Commit(const vk::Image& image, bool host_visible); /// Returns true if the memory allocations are done always in host visible and coherent memory. bool IsMemoryUnified() const { @@ -47,18 +47,18 @@ public: private: /// Allocates a chunk of memory. - bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); + bool AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); /// Tries to allocate a memory commit. - VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements, - vk::MemoryPropertyFlags wanted_properties); + VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags wanted_properties); /// Returns true if the device uses an unified memory model. - static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties); + static bool GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties); - const VKDevice& device; ///< Device handler. - const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties. - const bool is_memory_unified; ///< True if memory model is unified. + const VKDevice& device; ///< Device handler. + const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. + const bool is_memory_unified; ///< True if memory model is unified. std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. }; @@ -68,7 +68,7 @@ class VKMemoryCommitImpl final { public: explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, - vk::DeviceMemory memory, u64 begin, u64 end); + const vk::DeviceMemory& memory, u64 begin, u64 end); ~VKMemoryCommitImpl(); /// Maps a memory region and returns a pointer to it. @@ -80,13 +80,13 @@ public: MemoryMap Map() const; /// Returns the Vulkan memory handler. - vk::DeviceMemory GetMemory() const { - return memory; + VkDeviceMemory GetMemory() const { + return *memory; } /// Returns the start position of the commit relative to the allocation. - vk::DeviceSize GetOffset() const { - return static_cast<vk::DeviceSize>(interval.first); + VkDeviceSize GetOffset() const { + return static_cast<VkDeviceSize>(interval.first); } private: @@ -94,8 +94,8 @@ private: void Unmap() const; const VKDevice& device; ///< Vulkan device. + const vk::DeviceMemory& memory; ///< Vulkan device memory handler. std::pair<u64, u64> interval{}; ///< Interval where the commit exists. - vk::DeviceMemory memory; ///< Vulkan device memory handler. VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c2a426aeb..8fdc6400d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -13,7 +13,6 @@ #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" @@ -26,6 +25,7 @@ #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/compiler_settings.h" namespace Vulkan { @@ -36,12 +36,11 @@ using Tegra::Engines::ShaderType; namespace { -// C++20's using enum -constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer; -constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer; -constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer; -constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler; -constexpr auto eStorageImage = vk::DescriptorType::eStorageImage; +constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; +constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; +constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; +constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; +constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ VideoCommon::Shader::CompileDepth::FullDecompile}; @@ -126,32 +125,37 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { } } -template <vk::DescriptorType descriptor_type, class Container> -void AddBindings(std::vector<vk::DescriptorSetLayoutBinding>& bindings, u32& binding, - vk::ShaderStageFlags stage_flags, const Container& container) { +template <VkDescriptorType descriptor_type, class Container> +void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, + VkShaderStageFlags stage_flags, const Container& container) { const u32 num_entries = static_cast<u32>(std::size(container)); for (std::size_t i = 0; i < num_entries; ++i) { u32 count = 1; - if constexpr (descriptor_type == eCombinedImageSampler) { + if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { // Combined image samplers can be arrayed. count = container[i].Size(); } - bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr); + VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); + entry.binding = binding++; + entry.descriptorType = descriptor_type; + entry.descriptorCount = count; + entry.stageFlags = stage_flags; + entry.pImmutableSamplers = nullptr; } } u32 FillDescriptorLayout(const ShaderEntries& entries, - std::vector<vk::DescriptorSetLayoutBinding>& bindings, + std::vector<VkDescriptorSetLayoutBinding>& bindings, Maxwell::ShaderProgram program_type, u32 base_binding) { const ShaderType stage = GetStageFromProgram(program_type); - const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); + const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); u32 binding = base_binding; - AddBindings<eUniformBuffer>(bindings, binding, flags, entries.const_buffers); - AddBindings<eStorageBuffer>(bindings, binding, flags, entries.global_buffers); - AddBindings<eUniformTexelBuffer>(bindings, binding, flags, entries.texel_buffers); - AddBindings<eCombinedImageSampler>(bindings, binding, flags, entries.samplers); - AddBindings<eStorageImage>(bindings, binding, flags, entries.images); + AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); + AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); + AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers); + AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); + AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); return binding; } @@ -318,24 +322,24 @@ void VKPipelineCache::Unregister(const Shader& shader) { RasterizerCache::Unregister(shader); } -std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> +std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const auto& fixed_state = key.fixed_state; auto& memory_manager = system.GPU().MemoryManager(); const auto& gpu = system.GPU().Maxwell3D(); Specialization specialization; - if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { - ASSERT(fixed_state.input_assembly.point_size != 0.0f); - specialization.point_size = fixed_state.input_assembly.point_size; + if (fixed_state.rasterizer.Topology() == Maxwell::PrimitiveTopology::Points) { + ASSERT(fixed_state.rasterizer.point_size != 0); + std::memcpy(&specialization.point_size, &fixed_state.rasterizer.point_size, sizeof(u32)); } for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; + specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); } specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; SPIRVProgram program; - std::vector<vk::DescriptorSetLayoutBinding> bindings; + std::vector<VkDescriptorSetLayoutBinding> bindings; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); @@ -371,32 +375,49 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { return {std::move(program), std::move(bindings)}; } -template <vk::DescriptorType descriptor_type, class Container> -void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, u32& binding, +template <VkDescriptorType descriptor_type, class Container> +void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, u32& offset, const Container& container) { static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); const u32 count = static_cast<u32>(std::size(container)); - if constexpr (descriptor_type == eCombinedImageSampler) { + if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { for (u32 i = 0; i < count; ++i) { const u32 num_samplers = container[i].Size(); - template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset, - entry_size); + VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); + entry.dstBinding = binding; + entry.dstArrayElement = 0; + entry.descriptorCount = num_samplers; + entry.descriptorType = descriptor_type; + entry.offset = offset; + entry.stride = entry_size; + ++binding; offset += num_samplers * entry_size; } return; } - if constexpr (descriptor_type == eUniformTexelBuffer) { + if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) { // Nvidia has a bug where updating multiple uniform texels at once causes the driver to // crash. for (u32 i = 0; i < count; ++i) { - template_entries.emplace_back(binding + i, 0, 1, descriptor_type, - offset + i * entry_size, entry_size); + VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); + entry.dstBinding = binding + i; + entry.dstArrayElement = 0; + entry.descriptorCount = 1; + entry.descriptorType = descriptor_type; + entry.offset = offset + i * entry_size; + entry.stride = entry_size; } } else if (count > 0) { - template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); + VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); + entry.dstBinding = binding; + entry.dstArrayElement = 0; + entry.descriptorCount = count; + entry.descriptorType = descriptor_type; + entry.offset = offset; + entry.stride = entry_size; } offset += count * entry_size; binding += count; @@ -404,12 +425,12 @@ void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, void FillDescriptorUpdateTemplateEntries( const ShaderEntries& entries, u32& binding, u32& offset, - std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { - AddEntry<eUniformBuffer>(template_entries, offset, binding, entries.const_buffers); - AddEntry<eStorageBuffer>(template_entries, offset, binding, entries.global_buffers); - AddEntry<eUniformTexelBuffer>(template_entries, offset, binding, entries.texel_buffers); - AddEntry<eCombinedImageSampler>(template_entries, offset, binding, entries.samplers); - AddEntry<eStorageImage>(template_entries, offset, binding, entries.images); + std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { + AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); + AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); + AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers); + AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); + AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 27c01732f..7ccdb7083 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,12 +19,12 @@ #include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" #include "video_core/surface.h" @@ -172,7 +172,7 @@ protected: void FlushObjectInner(const Shader& object) override {} private: - std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders( + std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( const GraphicsPipelineCacheKey& key); Core::System& system; @@ -194,6 +194,6 @@ private: void FillDescriptorUpdateTemplateEntries( const ShaderEntries& entries, u32& binding, u32& offset, - std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); + std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ffbf60dda..813f7c162 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -8,19 +8,19 @@ #include <utility> #include <vector> -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { namespace { -constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; +constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION}; -constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { +constexpr VkQueryType GetTarget(VideoCore::QueryType type) { return QUERY_TARGETS[static_cast<std::size_t>(type)]; } @@ -35,29 +35,34 @@ void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) type = type_; } -std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) { +std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) { std::size_t index; do { index = CommitResource(fence); } while (usage[index]); usage[index] = true; - return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)}; + return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)}; } void QueryPool::Allocate(std::size_t begin, std::size_t end) { usage.resize(end); - const auto dev = device->GetLogical(); - const u32 size = static_cast<u32>(end - begin); - const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); - pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); + VkQueryPoolCreateInfo query_pool_ci; + query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + query_pool_ci.pNext = nullptr; + query_pool_ci.flags = 0; + query_pool_ci.queryType = GetTarget(type); + query_pool_ci.queryCount = static_cast<u32>(end - begin); + query_pool_ci.pipelineStatistics = 0; + pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci)); } -void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) { +void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { const auto it = - std::find_if(std::begin(pools), std::end(pools), - [query_pool = query.first](auto& pool) { return query_pool == *pool; }); + std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) { + return query_pool == *pool; + }); ASSERT(it != std::end(pools)); const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); @@ -76,12 +81,11 @@ VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& VKQueryCache::~VKQueryCache() = default; -std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { +std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); } -void VKQueryCache::Reserve(VideoCore::QueryType type, - std::pair<vk::QueryPool, std::uint32_t> query) { +void VKQueryCache::Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query) { query_pools[static_cast<std::size_t>(type)].Reserve(query); } @@ -89,10 +93,10 @@ HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> depen VideoCore::QueryType type) : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { - const auto dev = cache.Device().GetLogical(); - cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { - dev.resetQueryPoolEXT(query.first, query.second, 1, dld); - cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); + const vk::Device* logical = &cache.Device().GetLogical(); + cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { + logical->ResetQueryPoolEXT(query.first, query.second, 1); + cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); }); } @@ -101,22 +105,27 @@ HostCounter::~HostCounter() { } void HostCounter::EndQuery() { - cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { - cmdbuf.endQuery(query.first, query.second, dld); - }); + cache.Scheduler().Record( + [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); } u64 HostCounter::BlockingQuery() const { if (ticks >= cache.Scheduler().Ticks()) { cache.Scheduler().Flush(); } - - const auto dev = cache.Device().GetLogical(); - const auto& dld = cache.Device().GetDispatchLoader(); - u64 value; - dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value), - vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld); - return value; + u64 data; + const VkResult result = cache.Device().GetLogical().GetQueryResults( + query.first, query.second, 1, sizeof(data), &data, sizeof(data), + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + switch (result) { + case VK_SUCCESS: + return data; + case VK_ERROR_DEVICE_LOST: + cache.Device().ReportLoss(); + [[fallthrough]]; + default: + throw vk::Exception(result); + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index c3092ee96..b63784f4b 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -12,8 +12,8 @@ #include "common/common_types.h" #include "video_core/query_cache.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace VideoCore { class RasterizerInterface; @@ -36,9 +36,9 @@ public: void Initialize(const VKDevice& device, VideoCore::QueryType type); - std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence); + std::pair<VkQueryPool, u32> Commit(VKFence& fence); - void Reserve(std::pair<vk::QueryPool, std::uint32_t> query); + void Reserve(std::pair<VkQueryPool, u32> query); protected: void Allocate(std::size_t begin, std::size_t end) override; @@ -49,7 +49,7 @@ private: const VKDevice* device = nullptr; VideoCore::QueryType type = {}; - std::vector<UniqueQueryPool> pools; + std::vector<vk::QueryPool> pools; std::vector<bool> usage; }; @@ -61,9 +61,9 @@ public: const VKDevice& device, VKScheduler& scheduler); ~VKQueryCache(); - std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type); + std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); - void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query); + void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); const VKDevice& Device() const noexcept { return device; @@ -91,7 +91,7 @@ private: VKQueryCache& cache; const VideoCore::QueryType type; - const std::pair<vk::QueryPool, std::uint32_t> query; + const std::pair<VkQueryPool, u32> query; const u64 ticks; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6b99cbbbc..b58a88664 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -19,7 +19,6 @@ #include "core/memory.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -39,6 +38,7 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -60,32 +60,42 @@ namespace { constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); -vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { - const auto& viewport = regs.viewport_transform[index]; - const float x = viewport.translate_x - viewport.scale_x; - const float y = viewport.translate_y - viewport.scale_y; - const float width = viewport.scale_x * 2.0f; - const float height = viewport.scale_y * 2.0f; +VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { + const auto& src = regs.viewport_transform[index]; + const float width = src.scale_x * 2.0f; + const float height = src.scale_y * 2.0f; - const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; - float near = viewport.translate_z - viewport.scale_z * reduce_z; - float far = viewport.translate_z + viewport.scale_z; + VkViewport viewport; + viewport.x = src.translate_x - src.scale_x; + viewport.y = src.translate_y - src.scale_y; + viewport.width = width != 0.0f ? width : 1.0f; + viewport.height = height != 0.0f ? height : 1.0f; + + const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; + viewport.minDepth = src.translate_z - src.scale_z * reduce_z; + viewport.maxDepth = src.translate_z + src.scale_z; if (!device.IsExtDepthRangeUnrestrictedSupported()) { - near = std::clamp(near, 0.0f, 1.0f); - far = std::clamp(far, 0.0f, 1.0f); + viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); + viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); } - - return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far); + return viewport; } -constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) { - const auto& scissor = regs.scissor_test[index]; - if (!scissor.enable) { - return {{0, 0}, {INT32_MAX, INT32_MAX}}; +VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { + const auto& src = regs.scissor_test[index]; + VkRect2D scissor; + if (src.enable) { + scissor.offset.x = static_cast<s32>(src.min_x); + scissor.offset.y = static_cast<s32>(src.min_y); + scissor.extent.width = src.max_x - src.min_x; + scissor.extent.height = src.max_y - src.min_y; + } else { + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent.width = std::numeric_limits<s32>::max(); + scissor.extent.height = std::numeric_limits<s32>::max(); } - const u32 width = scissor.max_x - scissor.min_x; - const u32 height = scissor.max_y - scissor.min_y; - return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}}; + return scissor; } std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( @@ -97,8 +107,8 @@ std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( return addresses; } -void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage, - vk::AccessFlags access) { +void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage, + VkAccessFlags access) { for (auto& [view, layout] : views) { view->Transition(*layout, pipeline_stage, access); } @@ -127,13 +137,13 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry class BufferBindings final { public: - void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) { - vertex.buffer_ptrs[vertex.num_buffers] = buffer; + void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset) { + vertex.buffers[vertex.num_buffers] = buffer; vertex.offsets[vertex.num_buffers] = offset; ++vertex.num_buffers; } - void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) { + void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { index.buffer = buffer; index.offset = offset; index.type = type; @@ -217,19 +227,19 @@ private: // Some of these fields are intentionally left uninitialized to avoid initializing them twice. struct { std::size_t num_buffers = 0; - std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs; - std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets; + std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; + std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; } vertex; struct { - const vk::Buffer* buffer = nullptr; - vk::DeviceSize offset; - vk::IndexType type; + VkBuffer buffer = nullptr; + VkDeviceSize offset; + VkIndexType type; } index; template <std::size_t N> void BindStatic(VKScheduler& scheduler) const { - if (index.buffer != nullptr) { + if (index.buffer) { BindStatic<N, true>(scheduler); } else { BindStatic<N, false>(scheduler); @@ -243,38 +253,31 @@ private: return; } - std::array<vk::Buffer, N> buffers; - std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(), - [](const auto ptr) { return *ptr; }); - - std::array<vk::DeviceSize, N> offsets; + std::array<VkBuffer, N> buffers; + std::array<VkDeviceSize, N> offsets; + std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin()); std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); if constexpr (is_indexed) { // Indexed draw - scheduler.Record([buffers, offsets, index_buffer = *index.buffer, - index_offset = index.offset, - index_type = index.type](auto cmdbuf, auto& dld) { - cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld); - cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), - dld); + scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); + cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); }); } else { // Array draw - scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) { - cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), - dld); + scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); }); } } }; -void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf, - const vk::DispatchLoaderDynamic& dld) const { +void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { if (is_indexed) { - cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld); + cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance); } else { - cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld); + cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance); } } @@ -289,6 +292,7 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind staging_pool(device, memory_manager, scheduler), descriptor_pool(device), update_descriptor_queue(device, scheduler), renderpass_cache(device), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), + quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, staging_pool), @@ -337,27 +341,22 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { const auto renderpass = pipeline.GetRenderPass(); const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); - scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); + scheduler.RequestRenderpass(renderpass, framebuffer, render_area); UpdateDynamicStates(); buffer_bindings.Bind(scheduler); - if (device.IsNvDeviceDiagnosticCheckpoints()) { - scheduler.Record( - [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); - } - BeginTransformFeedback(); const auto pipeline_layout = pipeline.GetLayout(); const auto descriptor_set = pipeline.CommitDescriptorSet(); - scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { + scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { if (descriptor_set) { - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout, - DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, + DESCRIPTOR_SET, descriptor_set, {}); } - draw_params.Draw(cmdbuf, dld); + draw_params.Draw(cmdbuf); }); EndTransformFeedback(); @@ -389,48 +388,54 @@ void RasterizerVulkan::Clear() { DEBUG_ASSERT(texceptions.none()); SetupImageTransitions(0, color_attachments, zeta_attachment); - const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); + const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); - scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); + scheduler.RequestRenderpass(renderpass, framebuffer, render_area); - const auto& scissor = regs.scissor_test[0]; - const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y); - vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y}; - scissor_extent.width = std::min(scissor_extent.width, render_area.width); - scissor_extent.height = std::min(scissor_extent.height, render_area.height); - - const u32 layer = regs.clear_buffers.layer; - const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1); + VkClearRect clear_rect; + clear_rect.baseArrayLayer = regs.clear_buffers.layer; + clear_rect.layerCount = 1; + clear_rect.rect = GetScissorState(regs, 0); + clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); + clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); if (use_color) { - const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], - regs.clear_color[2], regs.clear_color[3]}; - const vk::ClearValue clear_value{clear_color}; + VkClearValue clear_value; + std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); + const u32 color_attachment = regs.clear_buffers.RT; - scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) { - const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment, - clear_value); - cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); + scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { + VkClearAttachment attachment; + attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + attachment.colorAttachment = color_attachment; + attachment.clearValue = clear_value; + cmdbuf.ClearAttachments(attachment, clear_rect); }); } if (!use_depth && !use_stencil) { return; } - vk::ImageAspectFlags aspect_flags; + VkImageAspectFlags aspect_flags = 0; if (use_depth) { - aspect_flags |= vk::ImageAspectFlagBits::eDepth; + aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT; } if (use_stencil) { - aspect_flags |= vk::ImageAspectFlagBits::eStencil; + aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; } scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, - clear_rect, aspect_flags](auto cmdbuf, auto& dld) { - const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil); - const vk::ClearValue clear_value{clear_zeta}; - const vk::ClearAttachment attachment(aspect_flags, 0, clear_value); - cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); + clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { + VkClearValue clear_value; + clear_value.depthStencil.depth = clear_depth; + clear_value.depthStencil.stencil = clear_stencil; + + VkClearAttachment attachment; + attachment.aspectMask = aspect_flags; + attachment.colorAttachment = 0; + attachment.clearValue.depthStencil.depth = clear_depth; + attachment.clearValue.depthStencil.stencil = clear_stencil; + cmdbuf.ClearAttachments(attachment, clear_rect); }); } @@ -463,24 +468,19 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); - TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader, - vk::AccessFlagBits::eShaderRead); - TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); - - if (device.IsNvDeviceDiagnosticCheckpoints()) { - scheduler.Record( - [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); }); - } + TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT); + TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), layout = pipeline.GetLayout(), - descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) { - cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1, - &descriptor_set, 0, nullptr, dld); - cmdbuf.dispatch(grid_x, grid_y, grid_z, dld); + descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, + descriptor_set, {}); + cmdbuf.Dispatch(grid_x, grid_y, grid_z); }); } @@ -625,13 +625,13 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen continue; } overlap = true; - *layout = vk::ImageLayout::eGeneral; + *layout = VK_IMAGE_LAYOUT_GENERAL; } return overlap; } -std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( - vk::RenderPass renderpass) { +std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( + VkRenderPass renderpass) { FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; @@ -658,15 +658,20 @@ std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffer const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); auto& framebuffer = fbentry->second; if (is_cache_miss) { - const vk::FramebufferCreateInfo framebuffer_ci( - {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width, - key.height, key.layers); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); - } - - return {*framebuffer, vk::Extent2D{key.width, key.height}}; + VkFramebufferCreateInfo framebuffer_ci; + framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebuffer_ci.pNext = nullptr; + framebuffer_ci.flags = 0; + framebuffer_ci.renderPass = key.renderpass; + framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size()); + framebuffer_ci.pAttachments = key.views.data(); + framebuffer_ci.width = key.width; + framebuffer_ci.height = key.height; + framebuffer_ci.layers = key.layers; + framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci); + } + + return {*framebuffer, VkExtent2D{key.width, key.height}}; } RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, @@ -714,10 +719,9 @@ void RasterizerVulkan::SetupShaderDescriptors( void RasterizerVulkan::SetupImageTransitions( Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments, const View& zeta_attachment) { - TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics, - vk::AccessFlagBits::eShaderRead); - TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); + TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT); + TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { const auto color_attachment = color_attachments[rt]; @@ -725,19 +729,19 @@ void RasterizerVulkan::SetupImageTransitions( continue; } const auto image_layout = - texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal; - color_attachment->Transition( - image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite); + texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); } if (zeta_attachment != nullptr) { const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] - ? vk::ImageLayout::eGeneral - : vk::ImageLayout::eDepthStencilAttachmentOptimal; - zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests, - vk::AccessFlagBits::eDepthStencilAttachmentRead | - vk::AccessFlagBits::eDepthStencilAttachmentWrite); + ? VK_IMAGE_LAYOUT_GENERAL + : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + zeta_attachment->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); } } @@ -773,9 +777,9 @@ void RasterizerVulkan::BeginTransformFeedback() { const std::size_t size = binding.buffer_size; const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) { - cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld); - cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld); + scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { + cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); + cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } @@ -786,32 +790,36 @@ void RasterizerVulkan::EndTransformFeedback() { } scheduler.Record( - [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); }); + [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, BufferBindings& buffer_bindings) { const auto& regs = system.GPU().Maxwell3D().regs; - for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) { + for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { const auto& attrib = regs.vertex_attrib_format[index]; if (!attrib.IsValid()) { + vertex_input.SetAttribute(index, false, 0, 0, {}, {}); continue; } - const auto& buffer = regs.vertex_array[attrib.buffer]; + [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer]; ASSERT(buffer.IsEnabled()); - vertex_input.attributes[vertex_input.num_attributes++] = - FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size, - attrib.offset); + vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), + attrib.size.Value()); } - for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) { + for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { const auto& vertex_array = regs.vertex_array[index]; if (!vertex_array.IsEnabled()) { + vertex_input.SetBinding(index, false, 0, 0); continue; } + vertex_input.SetBinding( + index, true, vertex_array.stride, + regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); const GPUVAddr start{vertex_array.StartAddress()}; const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; @@ -819,10 +827,6 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex ASSERT(end > start); const std::size_t size{end - start + 1}; const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); - - vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding( - index, vertex_array.stride, - regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); buffer_bindings.AddVertexBinding(buffer, offset); } } @@ -831,18 +835,26 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar bool is_indexed) { const auto& regs = system.GPU().Maxwell3D().regs; switch (regs.draw.topology) { - case Maxwell::PrimitiveTopology::Quads: - if (params.is_indexed) { - UNIMPLEMENTED(); - } else { + case Maxwell::PrimitiveTopology::Quads: { + if (!params.is_indexed) { const auto [buffer, offset] = quad_array_pass.Assemble(params.num_vertices, params.base_vertex); - buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32); + buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); params.base_vertex = 0; params.num_vertices = params.num_vertices * 6 / 4; params.is_indexed = true; + break; } + const GPUVAddr gpu_addr = regs.index_array.IndexStart(); + auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + std::tie(buffer, offset) = quad_indexed_pass.Assemble( + regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); + + buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); + params.num_vertices = (params.num_vertices / 4) * 6; + params.base_vertex = 0; break; + } default: { if (!is_indexed) { break; @@ -853,7 +865,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar auto format = regs.index_array.format; const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { - std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset); + std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset); format = Maxwell::IndexFormat::UnsignedShort; } @@ -990,8 +1002,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd const auto size = memory_manager.Read<u32>(address + 8); if (size == 0) { - // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because - // Vulkan doesn't like empty buffers. + // Sometimes global memory pointers don't have a proper size. Upload a dummy entry + // because Vulkan doesn't like empty buffers. constexpr std::size_t dummy_size = 4; const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); @@ -1022,7 +1034,7 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu update_descriptor_queue.AddSampledImage(sampler, image_view); const auto image_layout = update_descriptor_queue.GetLastImageLayout(); - *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal; + *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; sampled_views.push_back(ImageView{std::move(view), image_layout}); } @@ -1039,7 +1051,7 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima update_descriptor_queue.AddImage(image_view); const auto image_layout = update_descriptor_queue.GetLastImageLayout(); - *image_layout = vk::ImageLayout::eGeneral; + *image_layout = VK_IMAGE_LAYOUT_GENERAL; image_views.push_back(ImageView{std::move(view), image_layout}); } @@ -1056,9 +1068,7 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; - scheduler.Record([viewports](auto cmdbuf, auto& dld) { - cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld); - }); + scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); } void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1072,9 +1082,7 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), GetScissorState(regs, 15)}; - scheduler.Record([scissors](auto cmdbuf, auto& dld) { - cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld); - }); + scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); } void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1082,8 +1090,8 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { return; } scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, - factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { - cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); + factor = regs.polygon_offset_factor](vk::CommandBuffer cmdbuf) { + cmdbuf.SetDepthBias(constant, clamp, factor / 2.0f); }); } @@ -1093,9 +1101,8 @@ void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& reg } const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a}; - scheduler.Record([blend_color](auto cmdbuf, auto& dld) { - cmdbuf.setBlendConstants(blend_color.data(), dld); - }); + scheduler.Record( + [blend_color](vk::CommandBuffer cmdbuf) { cmdbuf.SetBlendConstants(blend_color.data()); }); } void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1103,7 +1110,7 @@ void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) return; } scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( - auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); + vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBounds(min, max); }); } void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1116,24 +1123,24 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask, front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref, back_write_mask = regs.stencil_back_mask, - back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { + back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) { // Front face - cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld); - cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld); - cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld); + cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref); + cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask); + cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_BIT, front_test_mask); // Back face - cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld); - cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld); - cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld); + cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref); + cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask); + cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask); }); } else { // Front face defines both faces scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask, - test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { - cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld); - cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld); - cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld); + test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) { + cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref); + cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask); + cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask); }); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index f642dde76..d9108f862 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -17,7 +17,6 @@ #include "video_core/memory_manager.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -32,6 +31,7 @@ #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Core { class System; @@ -49,11 +49,10 @@ namespace Vulkan { struct VKScreenInfo; -using ImageViewsPack = - boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>; +using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>; struct FramebufferCacheKey { - vk::RenderPass renderpass{}; + VkRenderPass renderpass{}; u32 width = 0; u32 height = 0; u32 layers = 0; @@ -101,7 +100,7 @@ class BufferBindings; struct ImageView { View view; - vk::ImageLayout* layout = nullptr; + VkImageLayout* layout = nullptr; }; class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { @@ -137,7 +136,7 @@ public: private: struct DrawParameters { - void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const; + void Draw(vk::CommandBuffer cmdbuf) const; u32 base_instance = 0; u32 num_instances = 0; @@ -154,7 +153,7 @@ private: Texceptions UpdateAttachments(); - std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass); + std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); /// Setups geometry buffers and state. DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, @@ -255,6 +254,7 @@ private: VKUpdateDescriptorQueue update_descriptor_queue; VKRenderPassCache renderpass_cache; QuadArrayPass quad_array_pass; + QuadIndexedPass quad_indexed_pass; Uint8Pass uint8_pass; VKTextureCache texture_cache; @@ -272,7 +272,7 @@ private: u32 draw_counter = 0; // TODO(Rodrigo): Invalidate on image destruction - std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache; + std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 93f5d7ba0..4e5286a69 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -6,10 +6,10 @@ #include <vector> #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -17,7 +17,7 @@ VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {} VKRenderPassCache::~VKRenderPassCache() = default; -vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { +VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { const auto [pair, is_cache_miss] = cache.try_emplace(params); auto& entry = pair->second; if (is_cache_miss) { @@ -26,9 +26,9 @@ vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) return *entry; } -UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { - std::vector<vk::AttachmentDescription> descriptors; - std::vector<vk::AttachmentReference> color_references; +vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { + std::vector<VkAttachmentDescription> descriptors; + std::vector<VkAttachmentReference> color_references; for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) { const auto attachment = params.color_attachments[rt]; @@ -39,16 +39,25 @@ UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& par // TODO(Rodrigo): Add eMayAlias when it's needed. const auto color_layout = attachment.is_texception - ? vk::ImageLayout::eGeneral - : vk::ImageLayout::eColorAttachmentOptimal; - descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format, - vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, - vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, - vk::AttachmentStoreOp::eDontCare, color_layout, color_layout); - color_references.emplace_back(static_cast<u32>(rt), color_layout); + ? VK_IMAGE_LAYOUT_GENERAL + : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkAttachmentDescription& descriptor = descriptors.emplace_back(); + descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; + descriptor.format = format.format; + descriptor.samples = VK_SAMPLE_COUNT_1_BIT; + descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + descriptor.initialLayout = color_layout; + descriptor.finalLayout = color_layout; + + VkAttachmentReference& reference = color_references.emplace_back(); + reference.attachment = static_cast<u32>(rt); + reference.layout = color_layout; } - vk::AttachmentReference zeta_attachment_ref; + VkAttachmentReference zeta_attachment_ref; if (params.has_zeta) { const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format); @@ -56,45 +65,68 @@ UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& par static_cast<u32>(params.zeta_pixel_format)); const auto zeta_layout = params.zeta_texception - ? vk::ImageLayout::eGeneral - : vk::ImageLayout::eDepthStencilAttachmentOptimal; - descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format, - vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, - vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad, - vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout); - zeta_attachment_ref = - vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout); + ? VK_IMAGE_LAYOUT_GENERAL + : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + VkAttachmentDescription& descriptor = descriptors.emplace_back(); + descriptor.flags = 0; + descriptor.format = format.format; + descriptor.samples = VK_SAMPLE_COUNT_1_BIT; + descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + descriptor.initialLayout = zeta_layout; + descriptor.finalLayout = zeta_layout; + + zeta_attachment_ref.attachment = static_cast<u32>(params.color_attachments.size()); + zeta_attachment_ref.layout = zeta_layout; } - const vk::SubpassDescription subpass_description( - {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()), - color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0, - nullptr); - - vk::AccessFlags access; - vk::PipelineStageFlags stage; + VkSubpassDescription subpass_description; + subpass_description.flags = 0; + subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass_description.inputAttachmentCount = 0; + subpass_description.pInputAttachments = nullptr; + subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size()); + subpass_description.pColorAttachments = color_references.data(); + subpass_description.pResolveAttachments = nullptr; + subpass_description.pDepthStencilAttachment = params.has_zeta ? &zeta_attachment_ref : nullptr; + subpass_description.preserveAttachmentCount = 0; + subpass_description.pPreserveAttachments = nullptr; + + VkAccessFlags access = 0; + VkPipelineStageFlags stage = 0; if (!color_references.empty()) { - access |= - vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite; - stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput; + access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; } if (params.has_zeta) { - access |= vk::AccessFlagBits::eDepthStencilAttachmentRead | - vk::AccessFlagBits::eDepthStencilAttachmentWrite; - stage |= vk::PipelineStageFlagBits::eLateFragmentTests; + access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; } - const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access, - {}); - - const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()), - descriptors.data(), 1, &subpass_description, 1, - &subpass_dependency); - - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createRenderPassUnique(create_info, nullptr, dld); + VkSubpassDependency subpass_dependency; + subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + subpass_dependency.dstSubpass = 0; + subpass_dependency.srcStageMask = stage; + subpass_dependency.dstStageMask = stage; + subpass_dependency.srcAccessMask = 0; + subpass_dependency.dstAccessMask = access; + subpass_dependency.dependencyFlags = 0; + + VkRenderPassCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.attachmentCount = static_cast<u32>(descriptors.size()); + ci.pAttachments = descriptors.data(); + ci.subpassCount = 1; + ci.pSubpasses = &subpass_description; + ci.dependencyCount = 1; + ci.pDependencies = &subpass_dependency; + return device.GetLogical().CreateRenderPass(ci); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h index b49b2db48..921b6efb5 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -12,7 +12,7 @@ #include <boost/functional/hash.hpp> #include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" namespace Vulkan { @@ -85,13 +85,13 @@ public: explicit VKRenderPassCache(const VKDevice& device); ~VKRenderPassCache(); - vk::RenderPass GetRenderPass(const RenderPassParams& params); + VkRenderPass GetRenderPass(const RenderPassParams& params); private: - UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const; + vk::RenderPass CreateRenderPass(const RenderPassParams& params) const; const VKDevice& device; - std::unordered_map<RenderPassParams, UniqueRenderPass> cache; + std::unordered_map<RenderPassParams, vk::RenderPass> cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp index 525b4bb46..dc06f545a 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp @@ -6,83 +6,83 @@ #include <optional> #include "common/assert.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { +namespace { + // TODO(Rodrigo): Fine tune these numbers. constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; constexpr std::size_t FENCES_GROW_STEP = 0x40; +VkFenceCreateInfo BuildFenceCreateInfo() { + VkFenceCreateInfo fence_ci; + fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_ci.pNext = nullptr; + fence_ci.flags = 0; + return fence_ci; +} + +} // Anonymous namespace + class CommandBufferPool final : public VKFencedPool { public: CommandBufferPool(const VKDevice& device) : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} void Allocate(std::size_t begin, std::size_t end) override { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - const u32 graphics_family = device.GetGraphicsFamily(); - - auto pool = std::make_unique<Pool>(); - // Command buffers are going to be commited, recorded, executed every single usage cycle. // They are also going to be reseted when commited. - const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient | - vk::CommandPoolCreateFlagBits::eResetCommandBuffer; - const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family); - pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld); - - const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle, - vk::CommandBufferLevel::ePrimary, - static_cast<u32>(COMMAND_BUFFER_POOL_SIZE)); - pool->cmdbufs = - dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld); - - pools.push_back(std::move(pool)); + VkCommandPoolCreateInfo command_pool_ci; + command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_ci.pNext = nullptr; + command_pool_ci.flags = + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily(); + + Pool& pool = pools.emplace_back(); + pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci); + pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE); } - vk::CommandBuffer Commit(VKFence& fence) { + VkCommandBuffer Commit(VKFence& fence) { const std::size_t index = CommitResource(fence); const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; - return *pools[pool_index]->cmdbufs[sub_index]; + return pools[pool_index].cmdbufs[sub_index]; } private: struct Pool { - UniqueCommandPool handle; - std::vector<UniqueCommandBuffer> cmdbufs; + vk::CommandPool handle; + vk::CommandBuffers cmdbufs; }; const VKDevice& device; - - std::vector<std::unique_ptr<Pool>> pools; + std::vector<Pool> pools; }; VKResource::VKResource() = default; VKResource::~VKResource() = default; -VKFence::VKFence(const VKDevice& device, UniqueFence handle) - : device{device}, handle{std::move(handle)} {} +VKFence::VKFence(const VKDevice& device) + : device{device}, handle{device.GetLogical().CreateFence(BuildFenceCreateInfo())} {} VKFence::~VKFence() = default; void VKFence::Wait() { - static constexpr u64 timeout = std::numeric_limits<u64>::max(); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - switch (const auto result = dev.waitForFences(1, &*handle, true, timeout, dld)) { - case vk::Result::eSuccess: + switch (const VkResult result = handle.Wait()) { + case VK_SUCCESS: return; - case vk::Result::eErrorDeviceLost: + case VK_ERROR_DEVICE_LOST: device.ReportLoss(); [[fallthrough]]; default: - vk::throwResultException(result, "vk::waitForFences"); + throw vk::Exception(result); } } @@ -107,13 +107,11 @@ bool VKFence::Tick(bool gpu_wait, bool owner_wait) { return false; } - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); if (gpu_wait) { // Wait for the fence if it has been requested. - dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld); + (void)handle.Wait(); } else { - if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) { + if (handle.GetStatus() != VK_SUCCESS) { // Vulkan fence is not ready, not much it can do here return false; } @@ -126,7 +124,7 @@ bool VKFence::Tick(bool gpu_wait, bool owner_wait) { protected_resources.clear(); // Prepare fence for reusage. - dev.resetFences({*handle}, dld); + handle.Reset(); is_used = false; return true; } @@ -299,21 +297,16 @@ VKFence& VKResourceManager::CommitFence() { return *found_fence; } -vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) { +VkCommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) { return command_buffer_pool->Commit(fence); } void VKResourceManager::GrowFences(std::size_t new_fences_count) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - const vk::FenceCreateInfo fence_ci; - const std::size_t previous_size = fences.size(); fences.resize(previous_size + new_fences_count); - std::generate(fences.begin() + previous_size, fences.end(), [&]() { - return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld)); - }); + std::generate(fences.begin() + previous_size, fences.end(), + [this] { return std::make_unique<VKFence>(device); }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h index d4cbc95a5..f683d2276 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.h +++ b/src/video_core/renderer_vulkan/vk_resource_manager.h @@ -7,7 +7,7 @@ #include <cstddef> #include <memory> #include <vector> -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -42,7 +42,7 @@ class VKFence { friend class VKResourceManager; public: - explicit VKFence(const VKDevice& device, UniqueFence handle); + explicit VKFence(const VKDevice& device); ~VKFence(); /** @@ -69,7 +69,7 @@ public: void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept; /// Retreives the fence. - operator vk::Fence() const { + operator VkFence() const { return *handle; } @@ -87,7 +87,7 @@ private: bool Tick(bool gpu_wait, bool owner_wait); const VKDevice& device; ///< Device handler - UniqueFence handle; ///< Vulkan fence + vk::Fence handle; ///< Vulkan fence std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence bool is_owned = false; ///< The fence has been commited but not released yet. bool is_used = false; ///< The fence has been commited but it has not been checked to be free. @@ -181,7 +181,7 @@ public: VKFence& CommitFence(); /// Commits an unused command buffer and protects it with a fence. - vk::CommandBuffer CommitCommandBuffer(VKFence& fence); + VkCommandBuffer CommitCommandBuffer(VKFence& fence); private: /// Allocates new fences. diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 204b7c39c..07bbcf520 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -7,64 +7,64 @@ #include <unordered_map> #include "common/assert.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/textures/texture.h" namespace Vulkan { -static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) { +namespace { + +VkBorderColor ConvertBorderColor(std::array<float, 4> color) { // TODO(Rodrigo): Manage integer border colors if (color == std::array<float, 4>{0, 0, 0, 0}) { - return vk::BorderColor::eFloatTransparentBlack; + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; } else if (color == std::array<float, 4>{0, 0, 0, 1}) { - return vk::BorderColor::eFloatOpaqueBlack; + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; } else if (color == std::array<float, 4>{1, 1, 1, 1}) { - return vk::BorderColor::eFloatOpaqueWhite; + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } + if (color[0] + color[1] + color[2] > 1.35f) { + // If color elements are brighter than roughly 0.5 average, use white border + return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + } else if (color[3] > 0.5f) { + return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; } else { - if (color[0] + color[1] + color[2] > 1.35f) { - // If color elements are brighter than roughly 0.5 average, use white border - return vk::BorderColor::eFloatOpaqueWhite; - } - if (color[3] > 0.5f) { - return vk::BorderColor::eFloatOpaqueBlack; - } - return vk::BorderColor::eFloatTransparentBlack; + return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; } } +} // Anonymous namespace + VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {} VKSamplerCache::~VKSamplerCache() = default; -UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { - const float max_anisotropy{tsc.GetMaxAnisotropy()}; - const bool has_anisotropy{max_anisotropy > 1.0f}; - - const auto border_color{tsc.GetBorderColor()}; - const auto vk_border_color{TryConvertBorderColor(border_color)}; - - constexpr bool unnormalized_coords{false}; - - const vk::SamplerCreateInfo sampler_ci( - {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), - MaxwellToVK::Sampler::Filter(tsc.min_filter), - MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), - MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), - MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), - MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), - has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, - MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), - tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), - unnormalized_coords); - - const auto& dld{device.GetDispatchLoader()}; - const auto dev{device.GetLogical()}; - return dev.createSamplerUnique(sampler_ci, nullptr, dld); +vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { + VkSamplerCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter); + ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter); + ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter); + ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter); + ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter); + ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter); + ci.mipLodBias = tsc.GetLodBias(); + ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE; + ci.maxAnisotropy = tsc.GetMaxAnisotropy(); + ci.compareEnable = tsc.depth_compare_enabled; + ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); + ci.minLod = tsc.GetMinLod(); + ci.maxLod = tsc.GetMaxLod(); + ci.borderColor = ConvertBorderColor(tsc.GetBorderColor()); + ci.unnormalizedCoordinates = VK_FALSE; + return device.GetLogical().CreateSampler(ci); } -vk::Sampler VKSamplerCache::ToSamplerType(const UniqueSampler& sampler) const { +VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { return *sampler; } diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h index 1f73b716b..a33d1c0ee 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h @@ -4,7 +4,7 @@ #pragma once -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/sampler_cache.h" #include "video_core/textures/texture.h" @@ -12,15 +12,15 @@ namespace Vulkan { class VKDevice; -class VKSamplerCache final : public VideoCommon::SamplerCache<vk::Sampler, UniqueSampler> { +class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> { public: explicit VKSamplerCache(const VKDevice& device); ~VKSamplerCache(); protected: - UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; + vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; - vk::Sampler ToSamplerType(const UniqueSampler& sampler) const override; + VkSampler ToSamplerType(const vk::Sampler& sampler) const override; private: const VKDevice& device; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index b61d4fe63..ae7ba3eb5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -10,23 +10,22 @@ #include "common/assert.h" #include "common/microprofile.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_WaitForWorker); -void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf, - const vk::DispatchLoaderDynamic& dld) { +void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { auto command = first; while (command != nullptr) { auto next = command->GetNext(); - command->Execute(cmdbuf, dld); + command->Execute(cmdbuf); command->~Command(); command = next; } @@ -51,7 +50,7 @@ VKScheduler::~VKScheduler() { worker_thread.join(); } -void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { +void VKScheduler::Flush(bool release_fence, VkSemaphore semaphore) { SubmitExecution(semaphore); if (release_fence) { current_fence->Release(); @@ -59,7 +58,7 @@ void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { AllocateNewContext(); } -void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) { +void VKScheduler::Finish(bool release_fence, VkSemaphore semaphore) { SubmitExecution(semaphore); current_fence->Wait(); if (release_fence) { @@ -89,17 +88,34 @@ void VKScheduler::DispatchWork() { AcquireNewChunk(); } -void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) { - if (state.renderpass && renderpass_bi == *state.renderpass) { +void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, + VkExtent2D render_area) { + if (renderpass == state.renderpass && framebuffer == state.framebuffer && + render_area.width == state.render_area.width && + render_area.height == state.render_area.height) { return; } - const bool end_renderpass = state.renderpass.has_value(); - state.renderpass = renderpass_bi; - Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) { + const bool end_renderpass = state.renderpass != nullptr; + state.renderpass = renderpass; + state.framebuffer = framebuffer; + state.render_area = render_area; + + VkRenderPassBeginInfo renderpass_bi; + renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + renderpass_bi.pNext = nullptr; + renderpass_bi.renderPass = renderpass; + renderpass_bi.framebuffer = framebuffer; + renderpass_bi.renderArea.offset.x = 0; + renderpass_bi.renderArea.offset.y = 0; + renderpass_bi.renderArea.extent = render_area; + renderpass_bi.clearValueCount = 0; + renderpass_bi.pClearValues = nullptr; + + Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { if (end_renderpass) { - cmdbuf.endRenderPass(dld); + cmdbuf.EndRenderPass(); } - cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld); + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); }); } @@ -107,13 +123,13 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() { EndRenderPass(); } -void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) { +void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { if (state.graphics_pipeline == pipeline) { return; } state.graphics_pipeline = pipeline; - Record([pipeline](auto cmdbuf, auto& dld) { - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld); + Record([pipeline](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); }); } @@ -126,37 +142,58 @@ void VKScheduler::WorkerThread() { } auto extracted_chunk = std::move(chunk_queue.Front()); chunk_queue.Pop(); - extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader()); + extracted_chunk->ExecuteAll(current_cmdbuf); chunk_reserve.Push(std::move(extracted_chunk)); } while (!quit); } -void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { +void VKScheduler::SubmitExecution(VkSemaphore semaphore) { EndPendingOperations(); InvalidateState(); WaitWorker(); std::unique_lock lock{mutex}; - const auto queue = device.GetGraphicsQueue(); - const auto& dld = device.GetDispatchLoader(); - current_cmdbuf.end(dld); - - const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, ¤t_cmdbuf, semaphore ? 1U : 0U, - &semaphore); - queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld); + current_cmdbuf.End(); + + VkSubmitInfo submit_info; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = nullptr; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = nullptr; + submit_info.pWaitDstStageMask = nullptr; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = current_cmdbuf.address(); + submit_info.signalSemaphoreCount = semaphore ? 1 : 0; + submit_info.pSignalSemaphores = &semaphore; + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) { + case VK_SUCCESS: + break; + case VK_ERROR_DEVICE_LOST: + device.ReportLoss(); + [[fallthrough]]; + default: + vk::Check(result); + } } void VKScheduler::AllocateNewContext() { ++ticks; + VkCommandBufferBeginInfo cmdbuf_bi; + cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cmdbuf_bi.pNext = nullptr; + cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + cmdbuf_bi.pInheritanceInfo = nullptr; + std::unique_lock lock{mutex}; current_fence = next_fence; next_fence = &resource_manager.CommitFence(); - current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); - current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, - device.GetDispatchLoader()); + current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence), + device.GetDispatchLoader()); + current_cmdbuf.Begin(cmdbuf_bi); + // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { query_cache->UpdateCounters(); @@ -177,8 +214,8 @@ void VKScheduler::EndRenderPass() { if (!state.renderpass) { return; } - state.renderpass = std::nullopt; - Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); }); + state.renderpass = nullptr; + Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); } void VKScheduler::AcquireNewChunk() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index c7cc291c3..82a8adc69 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -13,7 +13,7 @@ #include <utility> #include "common/common_types.h" #include "common/threadsafe_queue.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -49,10 +49,10 @@ public: ~VKScheduler(); /// Sends the current execution context to the GPU. - void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr); + void Flush(bool release_fence = true, VkSemaphore semaphore = nullptr); /// Sends the current execution context to the GPU and waits for it to complete. - void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr); + void Finish(bool release_fence = true, VkSemaphore semaphore = nullptr); /// Waits for the worker thread to finish executing everything. After this function returns it's /// safe to touch worker resources. @@ -62,14 +62,15 @@ public: void DispatchWork(); /// Requests to begin a renderpass. - void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi); + void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, + VkExtent2D render_area); /// Requests the current executino context to be able to execute operations only allowed outside /// of a renderpass. void RequestOutsideRenderPassOperationContext(); /// Binds a pipeline to the current execution context. - void BindGraphicsPipeline(vk::Pipeline pipeline); + void BindGraphicsPipeline(VkPipeline pipeline); /// Assigns the query cache. void SetQueryCache(VKQueryCache& query_cache_) { @@ -101,8 +102,7 @@ private: public: virtual ~Command() = default; - virtual void Execute(vk::CommandBuffer cmdbuf, - const vk::DispatchLoaderDynamic& dld) const = 0; + virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; Command* GetNext() const { return next; @@ -125,9 +125,8 @@ private: TypedCommand(TypedCommand&&) = delete; TypedCommand& operator=(TypedCommand&&) = delete; - void Execute(vk::CommandBuffer cmdbuf, - const vk::DispatchLoaderDynamic& dld) const override { - command(cmdbuf, dld); + void Execute(vk::CommandBuffer cmdbuf) const override { + command(cmdbuf); } private: @@ -136,7 +135,7 @@ private: class CommandChunk final { public: - void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld); + void ExecuteAll(vk::CommandBuffer cmdbuf); template <typename T> bool Record(T& command) { @@ -175,7 +174,7 @@ private: void WorkerThread(); - void SubmitExecution(vk::Semaphore semaphore); + void SubmitExecution(VkSemaphore semaphore); void AllocateNewContext(); @@ -198,8 +197,10 @@ private: VKFence* next_fence = nullptr; struct State { - std::optional<vk::RenderPassBeginInfo> renderpass; - vk::Pipeline graphics_pipeline; + VkRenderPass renderpass = nullptr; + VkFramebuffer framebuffer = nullptr; + VkExtent2D render_area = {0, 0}; + VkPipeline graphics_pipeline = nullptr; } state; std::unique_ptr<CommandChunk> chunk; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b9f9e2714..aaa138f52 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -801,7 +801,7 @@ private: if (IsOutputAttributeArray()) { const u32 num = GetNumOutputVertices(); type = TypeArray(type, Constant(t_uint, num)); - if (device.GetDriverID() != vk::DriverIdKHR::eIntelProprietaryWindows) { + if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { // Intel's proprietary driver fails to setup defaults for arrayed output // attributes. varying_default = ConstantComposite(type, std::vector(num, varying_default)); @@ -1938,11 +1938,8 @@ private: return {}; } - template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, - Type value_type = result_type> + template <Id (Module::*func)(Id, Id, Id, Id, Id)> Expression Atomic(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - Id pointer; if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { pointer = GetSharedMemoryPointer(*smem); @@ -1950,15 +1947,19 @@ private: pointer = GetGlobalMemoryPointer(*gmem); } else { UNREACHABLE(); - return {Constant(type_def, 0), result_type}; + return {v_float_zero, Type::Float}; } - - const Id value = As(Visit(operation[1]), value_type); - const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); - const Id semantics = Constant(type_def, 0); + const Id semantics = Constant(t_uint, 0); + const Id value = AsUint(Visit(operation[1])); + + return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; + } - return {(this->*func)(type_def, pointer, scope, semantics, value), result_type}; + template <Id (Module::*func)(Id, Id, Id, Id, Id)> + Expression Reduce(Operation operation) { + Atomic<func>(operation); + return {}; } Expression Branch(Operation operation) { @@ -2547,21 +2548,35 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, + + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, + + &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, + + &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index b97c4cb3d..784839327 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -8,27 +8,25 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { -UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { +vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { // Avoid undefined behavior by copying to a staging allocation ASSERT(code_size % sizeof(u32) == 0); const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); std::memcpy(data.get(), code_data, code_size); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - const vk::ShaderModuleCreateInfo shader_ci({}, code_size, data.get()); - vk::ShaderModule shader_module; - if (dev.createShaderModule(&shader_ci, nullptr, &shader_module, dld) != vk::Result::eSuccess) { - UNREACHABLE_MSG("Shader module failed to build!"); - } - - return UniqueShaderModule(shader_module, vk::ObjectDestroy(dev, nullptr, dld)); + VkShaderModuleCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.codeSize = code_size; + ci.pCode = data.get(); + return device.GetLogical().CreateShaderModule(ci); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index c06d65970..be38d6697 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -6,12 +6,12 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { class VKDevice; -UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); +vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 374959f82..94d954d7a 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -13,6 +13,7 @@ #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -71,17 +72,23 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_ } VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { - const auto usage = - vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | - vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer | - vk::BufferUsageFlagBits::eIndexBuffer; const u32 log2 = Common::Log2Ceil64(size); - const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0, - nullptr); - const auto dev = device.GetLogical(); + + VkBufferCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.size = 1ULL << log2; + ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + auto buffer = std::make_unique<VKBuffer>(); - buffer->handle = dev.createBufferUnique(buffer_ci, nullptr, device.GetDispatchLoader()); - buffer->commit = memory_manager.Commit(*buffer->handle, host_visible); + buffer->handle = device.GetLogical().CreateBuffer(ci); + buffer->commit = memory_manager.Commit(buffer->handle, host_visible); auto& entries = GetCache(host_visible)[log2].entries; return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 4d9488f49..a0840ff8c 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -11,9 +11,9 @@ #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -22,7 +22,7 @@ class VKFenceWatch; class VKScheduler; struct VKBuffer final { - UniqueBuffer handle; + vk::Buffer handle; VKMemoryCommit commit; }; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index d48d3b44c..868447af2 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -3,17 +3,18 @@ // Refer to the license.txt file included. #include <algorithm> +#include <limits> #include <optional> #include <tuple> #include <vector> #include "common/alignment.h" #include "common/assert.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -22,26 +23,42 @@ namespace { constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; -constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; +constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024; -std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, - vk::MemoryPropertyFlags wanted) { - const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader()); - for (u32 i = 0; i < properties.memoryTypeCount; i++) { - if (!(filter & (1 << i))) { - continue; - } - if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) { +/// Find a memory type with the passed requirements +std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties, + VkMemoryPropertyFlags wanted, + u32 filter = std::numeric_limits<u32>::max()) { + for (u32 i = 0; i < properties.memoryTypeCount; ++i) { + const auto flags = properties.memoryTypes[i].propertyFlags; + if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) { return i; } } - return {}; + return std::nullopt; +} + +/// Get the preferred host visible memory type. +u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, + u32 filter = std::numeric_limits<u32>::max()) { + // Prefer device local host visible allocations. Both AMD and Nvidia now provide one. + // Otherwise search for a host visible allocation. + static constexpr auto HOST_MEMORY = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY); + if (!preferred_type) { + preferred_type = FindMemoryType(properties, HOST_MEMORY); + ASSERT_MSG(preferred_type, "No host visible and coherent memory type found"); + } + return preferred_type.value_or(0); } } // Anonymous namespace VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, - vk::BufferUsageFlags usage) + VkBufferUsageFlags usage) : device{device}, scheduler{scheduler} { CreateBuffers(usage); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); @@ -51,7 +68,7 @@ VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, VKStreamBuffer::~VKStreamBuffer() = default; std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { - ASSERT(size <= STREAM_BUFFER_SIZE); + ASSERT(size <= stream_buffer_size); mapped_size = size; if (alignment > 0) { @@ -61,7 +78,7 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { WaitPendingOperations(offset); bool invalidated = false; - if (offset + size > STREAM_BUFFER_SIZE) { + if (offset + size > stream_buffer_size) { // The buffer would overflow, save the amount of used watches and reset the state. invalidation_mark = current_watch_cursor; current_watch_cursor = 0; @@ -78,17 +95,13 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { invalidated = true; } - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld)); - return {pointer, offset, invalidated}; + return {memory.Map(offset, size), offset, invalidated}; } void VKStreamBuffer::Unmap(u64 size) { ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); - const auto dev = device.GetLogical(); - dev.unmapMemory(*memory, device.GetDispatchLoader()); + memory.Unmap(); offset += size; @@ -101,30 +114,39 @@ void VKStreamBuffer::Unmap(u64 size) { watch.fence.Watch(scheduler.GetFence()); } -void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) { - const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive, - 0, nullptr); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); - - const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld); - // Prefer device local host visible allocations (this should hit AMD's pinned memory). - auto type = FindMemoryType(device, requirements.memoryTypeBits, - vk::MemoryPropertyFlagBits::eHostVisible | - vk::MemoryPropertyFlagBits::eHostCoherent | - vk::MemoryPropertyFlagBits::eDeviceLocal); - if (!type) { - // Otherwise search for a host visible allocation. - type = FindMemoryType(device, requirements.memoryTypeBits, - vk::MemoryPropertyFlagBits::eHostVisible | - vk::MemoryPropertyFlagBits::eHostCoherent); - ASSERT_MSG(type, "No host visible and coherent memory type found"); - } - const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type); - memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld); - - dev.bindBufferMemory(*buffer, *memory, 0, dld); +void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { + const auto memory_properties = device.GetPhysical().GetMemoryProperties(); + const u32 preferred_type = GetMemoryType(memory_properties); + const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; + + // Substract from the preferred heap size some bytes to avoid getting out of memory. + const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; + const VkDeviceSize allocable_size = heap_size - 4 * 1024 * 1024; + + VkBufferCreateInfo buffer_ci; + buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_ci.pNext = nullptr; + buffer_ci.flags = 0; + buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size); + buffer_ci.usage = usage; + buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_ci.queueFamilyIndexCount = 0; + buffer_ci.pQueueFamilyIndices = nullptr; + + buffer = device.GetLogical().CreateBuffer(buffer_ci); + + const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer); + const u32 required_flags = requirements.memoryTypeBits; + stream_buffer_size = static_cast<u64>(requirements.size); + + VkMemoryAllocateInfo memory_ai; + memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_ai.pNext = nullptr; + memory_ai.allocationSize = requirements.size; + memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags); + + memory = device.GetLogical().AllocateMemory(memory_ai); + buffer.BindMemory(*memory, 0); } void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) { diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 187c0c612..dfddf7ad6 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -9,7 +9,7 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -21,7 +21,7 @@ class VKScheduler; class VKStreamBuffer final { public: explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, - vk::BufferUsageFlags usage); + VkBufferUsageFlags usage); ~VKStreamBuffer(); /** @@ -35,7 +35,7 @@ public: /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Unmap(u64 size); - vk::Buffer GetHandle() const { + VkBuffer GetHandle() const { return *buffer; } @@ -46,20 +46,19 @@ private: }; /// Creates Vulkan buffer handles committing the required the required memory. - void CreateBuffers(vk::BufferUsageFlags usage); + void CreateBuffers(VkBufferUsageFlags usage); /// Increases the amount of watches available. void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); void WaitPendingOperations(u64 requested_upper_bound); - const VKDevice& device; ///< Vulkan device manager. - VKScheduler& scheduler; ///< Command scheduler. - const vk::AccessFlags access; ///< Access usage of this stream buffer. - const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. + const VKDevice& device; ///< Vulkan device manager. + VKScheduler& scheduler; ///< Command scheduler. - UniqueBuffer buffer; ///< Mapped buffer. - UniqueDeviceMemory memory; ///< Memory allocation. + vk::Buffer buffer; ///< Mapped buffer. + vk::DeviceMemory memory; ///< Memory allocation. + u64 stream_buffer_size{}; ///< Stream buffer size. u64 offset{}; ///< Buffer iterator. u64 mapped_size{}; ///< Size reserved for the current copy. diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 9e73fa9cd..bffd8f32a 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -11,69 +11,64 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/frontend/framebuffer_layout.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { namespace { -vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats, - bool srgb) { - if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) { - vk::SurfaceFormatKHR format; - format.format = vk::Format::eB8G8R8A8Unorm; - format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear; +VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) { + if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) { + VkSurfaceFormatKHR format; + format.format = VK_FORMAT_B8G8R8A8_UNORM; + format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; return format; } const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) { - const auto request_format = srgb ? vk::Format::eB8G8R8A8Srgb : vk::Format::eB8G8R8A8Unorm; + const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; return format.format == request_format && - format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear; + format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; }); return found != formats.end() ? *found : formats[0]; } -vk::PresentModeKHR ChooseSwapPresentMode(const std::vector<vk::PresentModeKHR>& modes) { +VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) { // Mailbox doesn't lock the application like fifo (vsync), prefer it - const auto& found = std::find_if(modes.begin(), modes.end(), [](const auto& mode) { - return mode == vk::PresentModeKHR::eMailbox; - }); - return found != modes.end() ? *found : vk::PresentModeKHR::eFifo; + const auto found = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR); + return found != modes.end() ? *found : VK_PRESENT_MODE_FIFO_KHR; } -vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, - u32 height) { +VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) { constexpr auto undefined_size{std::numeric_limits<u32>::max()}; if (capabilities.currentExtent.width != undefined_size) { return capabilities.currentExtent; } - vk::Extent2D extent = {width, height}; + VkExtent2D extent; extent.width = std::max(capabilities.minImageExtent.width, - std::min(capabilities.maxImageExtent.width, extent.width)); + std::min(capabilities.maxImageExtent.width, width)); extent.height = std::max(capabilities.minImageExtent.height, - std::min(capabilities.maxImageExtent.height, extent.height)); + std::min(capabilities.maxImageExtent.height, height)); return extent; } } // Anonymous namespace -VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device) +VKSwapchain::VKSwapchain(VkSurfaceKHR surface, const VKDevice& device) : surface{surface}, device{device} {} VKSwapchain::~VKSwapchain() = default; void VKSwapchain::Create(u32 width, u32 height, bool srgb) { - const auto& dld = device.GetDispatchLoader(); const auto physical_device = device.GetPhysical(); - const auto capabilities{physical_device.getSurfaceCapabilitiesKHR(surface, dld)}; + const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { return; } - device.GetLogical().waitIdle(dld); + device.GetLogical().WaitIdle(); Destroy(); CreateSwapchain(capabilities, width, height, srgb); @@ -84,10 +79,8 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { } void VKSwapchain::AcquireNextImage() { - const auto dev{device.GetLogical()}; - const auto& dld{device.GetDispatchLoader()}; - dev.acquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), - *present_semaphores[frame_index], {}, &image_index, dld); + device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), + *present_semaphores[frame_index], {}, &image_index); if (auto& fence = fences[image_index]; fence) { fence->Wait(); @@ -96,29 +89,37 @@ void VKSwapchain::AcquireNextImage() { } } -bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) { - const vk::Semaphore present_semaphore{*present_semaphores[frame_index]}; - const std::array<vk::Semaphore, 2> semaphores{present_semaphore, render_semaphore}; - const u32 wait_semaphore_count{render_semaphore ? 2U : 1U}; - const auto& dld{device.GetDispatchLoader()}; +bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) { + const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; + const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; bool recreated = false; - const vk::PresentInfoKHR present_info(wait_semaphore_count, semaphores.data(), 1, - &swapchain.get(), &image_index, {}); - switch (const auto result = present_queue.presentKHR(&present_info, dld); result) { - case vk::Result::eSuccess: + VkPresentInfoKHR present_info; + present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + present_info.pNext = nullptr; + present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U; + present_info.pWaitSemaphores = semaphores.data(); + present_info.swapchainCount = 1; + present_info.pSwapchains = swapchain.address(); + present_info.pImageIndices = &image_index; + present_info.pResults = nullptr; + + switch (const VkResult result = present_queue.Present(present_info)) { + case VK_SUCCESS: + break; + case VK_SUBOPTIMAL_KHR: + LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; - case vk::Result::eErrorOutOfDateKHR: + case VK_ERROR_OUT_OF_DATE_KHR: if (current_width > 0 && current_height > 0) { Create(current_width, current_height, current_srgb); recreated = true; } break; default: - LOG_CRITICAL(Render_Vulkan, "Vulkan failed to present swapchain due to {}!", - vk::to_string(result)); - UNREACHABLE(); + LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); + break; } ASSERT(fences[image_index] == nullptr); @@ -132,74 +133,92 @@ bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebu return framebuffer.width != current_width || framebuffer.height != current_height; } -void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, +void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, bool srgb) { - const auto& dld{device.GetDispatchLoader()}; const auto physical_device{device.GetPhysical()}; - const auto formats{physical_device.getSurfaceFormatsKHR(surface, dld)}; - const auto present_modes{physical_device.getSurfacePresentModesKHR(surface, dld)}; + const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; + const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; - const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; - const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; + const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; + const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; u32 requested_image_count{capabilities.minImageCount + 1}; if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { requested_image_count = capabilities.maxImageCount; } - vk::SwapchainCreateInfoKHR swapchain_ci( - {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, {}, 1, - vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, capabilities.currentTransform, - vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, {}); + VkSwapchainCreateInfoKHR swapchain_ci; + swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; + swapchain_ci.pNext = nullptr; + swapchain_ci.flags = 0; + swapchain_ci.surface = surface; + swapchain_ci.minImageCount = requested_image_count; + swapchain_ci.imageFormat = surface_format.format; + swapchain_ci.imageColorSpace = surface_format.colorSpace; + swapchain_ci.imageArrayLayers = 1; + swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + swapchain_ci.queueFamilyIndexCount = 0; + swapchain_ci.pQueueFamilyIndices = nullptr; + swapchain_ci.preTransform = capabilities.currentTransform; + swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + swapchain_ci.presentMode = present_mode; + swapchain_ci.clipped = VK_FALSE; + swapchain_ci.oldSwapchain = nullptr; const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; const std::array<u32, 2> queue_indices{graphics_family, present_family}; if (graphics_family != present_family) { - swapchain_ci.imageSharingMode = vk::SharingMode::eConcurrent; + swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT; swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); } else { - swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive; + swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; } // Request the size again to reduce the possibility of a TOCTOU race condition. - const auto updated_capabilities = physical_device.getSurfaceCapabilitiesKHR(surface, dld); + const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); // Don't add code within this and the swapchain creation. - const auto dev{device.GetLogical()}; - swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld); + swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); extent = swapchain_ci.imageExtent; current_width = extent.width; current_height = extent.height; current_srgb = srgb; - images = dev.getSwapchainImagesKHR(*swapchain, dld); + images = swapchain.GetImages(); image_count = static_cast<u32>(images.size()); image_format = surface_format.format; } void VKSwapchain::CreateSemaphores() { - const auto dev{device.GetLogical()}; - const auto& dld{device.GetDispatchLoader()}; - present_semaphores.resize(image_count); - for (std::size_t i = 0; i < image_count; i++) { - present_semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld); - } + std::generate(present_semaphores.begin(), present_semaphores.end(), + [this] { return device.GetLogical().CreateSemaphore(); }); } void VKSwapchain::CreateImageViews() { - const auto dev{device.GetLogical()}; - const auto& dld{device.GetDispatchLoader()}; + VkImageViewCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + // ci.image + ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + ci.format = image_format; + ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; + ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + ci.subresourceRange.baseMipLevel = 0; + ci.subresourceRange.levelCount = 1; + ci.subresourceRange.baseArrayLayer = 0; + ci.subresourceRange.layerCount = 1; image_views.resize(image_count); for (std::size_t i = 0; i < image_count; i++) { - const vk::ImageViewCreateInfo image_view_ci({}, images[i], vk::ImageViewType::e2D, - image_format, {}, - {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}); - image_views[i] = dev.createImageViewUnique(image_view_ci, nullptr, dld); + ci.image = images[i]; + image_views[i] = device.GetLogical().CreateImageView(ci); } } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 2f3b2ccd5..a35d61345 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -7,7 +7,7 @@ #include <vector> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Layout { struct FramebufferLayout; @@ -20,7 +20,7 @@ class VKFence; class VKSwapchain { public: - explicit VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device); + explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device); ~VKSwapchain(); /// Creates (or recreates) the swapchain with a given size. @@ -31,12 +31,12 @@ public: /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be /// recreated. Takes responsability for the ownership of fence. - bool Present(vk::Semaphore render_semaphore, VKFence& fence); + bool Present(VkSemaphore render_semaphore, VKFence& fence); /// Returns true when the framebuffer layout has changed. bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; - const vk::Extent2D& GetSize() const { + VkExtent2D GetSize() const { return extent; } @@ -48,15 +48,15 @@ public: return image_index; } - vk::Image GetImageIndex(std::size_t index) const { + VkImage GetImageIndex(std::size_t index) const { return images[index]; } - vk::ImageView GetImageViewIndex(std::size_t index) const { + VkImageView GetImageViewIndex(std::size_t index) const { return *image_views[index]; } - vk::Format GetImageFormat() const { + VkFormat GetImageFormat() const { return image_format; } @@ -65,30 +65,30 @@ public: } private: - void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, + void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, bool srgb); void CreateSemaphores(); void CreateImageViews(); void Destroy(); - const vk::SurfaceKHR surface; + const VkSurfaceKHR surface; const VKDevice& device; - UniqueSwapchainKHR swapchain; + vk::SwapchainKHR swapchain; std::size_t image_count{}; - std::vector<vk::Image> images; - std::vector<UniqueImageView> image_views; - std::vector<UniqueFramebuffer> framebuffers; + std::vector<VkImage> images; + std::vector<vk::ImageView> image_views; + std::vector<vk::Framebuffer> framebuffers; std::vector<VKFence*> fences; - std::vector<UniqueSemaphore> present_semaphores; + std::vector<vk::Semaphore> present_semaphores; u32 image_index{}; u32 frame_index{}; - vk::Format image_format{}; - vk::Extent2D extent{}; + VkFormat image_format{}; + VkExtent2D extent{}; u32 current_width{}; u32 current_height{}; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 5b9b39670..de4c23120 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -17,7 +17,6 @@ #include "core/memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/morton.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" @@ -25,6 +24,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/surface.h" #include "video_core/textures/convert.h" @@ -39,18 +39,18 @@ using VideoCore::Surface::SurfaceTarget; namespace { -vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { +VkImageType SurfaceTargetToImage(SurfaceTarget target) { switch (target) { case SurfaceTarget::Texture1D: case SurfaceTarget::Texture1DArray: - return vk::ImageType::e1D; + return VK_IMAGE_TYPE_1D; case SurfaceTarget::Texture2D: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - return vk::ImageType::e2D; + return VK_IMAGE_TYPE_2D; case SurfaceTarget::Texture3D: - return vk::ImageType::e3D; + return VK_IMAGE_TYPE_3D; case SurfaceTarget::TextureBuffer: UNREACHABLE(); return {}; @@ -59,35 +59,35 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { return {}; } -vk::ImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { +VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { if (pixel_format < PixelFormat::MaxColorFormat) { - return vk::ImageAspectFlagBits::eColor; + return VK_IMAGE_ASPECT_COLOR_BIT; } else if (pixel_format < PixelFormat::MaxDepthFormat) { - return vk::ImageAspectFlagBits::eDepth; + return VK_IMAGE_ASPECT_DEPTH_BIT; } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { - return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; } else { - UNREACHABLE_MSG("Invalid pixel format={}", static_cast<u32>(pixel_format)); - return vk::ImageAspectFlagBits::eColor; + UNREACHABLE_MSG("Invalid pixel format={}", static_cast<int>(pixel_format)); + return VK_IMAGE_ASPECT_COLOR_BIT; } } -vk::ImageViewType GetImageViewType(SurfaceTarget target) { +VkImageViewType GetImageViewType(SurfaceTarget target) { switch (target) { case SurfaceTarget::Texture1D: - return vk::ImageViewType::e1D; + return VK_IMAGE_VIEW_TYPE_1D; case SurfaceTarget::Texture2D: - return vk::ImageViewType::e2D; + return VK_IMAGE_VIEW_TYPE_2D; case SurfaceTarget::Texture3D: - return vk::ImageViewType::e3D; + return VK_IMAGE_VIEW_TYPE_3D; case SurfaceTarget::Texture1DArray: - return vk::ImageViewType::e1DArray; + return VK_IMAGE_VIEW_TYPE_1D_ARRAY; case SurfaceTarget::Texture2DArray: - return vk::ImageViewType::e2DArray; + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; case SurfaceTarget::TextureCubemap: - return vk::ImageViewType::eCube; + return VK_IMAGE_VIEW_TYPE_CUBE; case SurfaceTarget::TextureCubeArray: - return vk::ImageViewType::eCubeArray; + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; case SurfaceTarget::TextureBuffer: break; } @@ -95,73 +95,88 @@ vk::ImageViewType GetImageViewType(SurfaceTarget target) { return {}; } -UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, - std::size_t host_memory_size) { +vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, + std::size_t host_memory_size) { // TODO(Rodrigo): Move texture buffer creation to the buffer cache - const vk::BufferCreateInfo buffer_ci({}, host_memory_size, - vk::BufferUsageFlagBits::eUniformTexelBuffer | - vk::BufferUsageFlagBits::eTransferSrc | - vk::BufferUsageFlagBits::eTransferDst, - vk::SharingMode::eExclusive, 0, nullptr); - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - return dev.createBufferUnique(buffer_ci, nullptr, dld); + VkBufferCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.size = static_cast<VkDeviceSize>(host_memory_size); + ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + return device.GetLogical().CreateBuffer(ci); } -vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, - const SurfaceParams& params, - vk::Buffer buffer, - std::size_t host_memory_size) { +VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, + const SurfaceParams& params, VkBuffer buffer, + std::size_t host_memory_size) { ASSERT(params.IsBuffer()); - const auto format = - MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; - return vk::BufferViewCreateInfo({}, buffer, format, 0, host_memory_size); + VkBufferViewCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.buffer = buffer; + ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; + ci.offset = 0; + ci.range = static_cast<VkDeviceSize>(host_memory_size); + return ci; } -vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { - constexpr auto sample_count = vk::SampleCountFlagBits::e1; - constexpr auto tiling = vk::ImageTiling::eOptimal; - +VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { ASSERT(!params.IsBuffer()); const auto [format, attachable, storage] = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); - auto image_usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | - vk::ImageUsageFlagBits::eTransferSrc; + VkImageCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.imageType = SurfaceTargetToImage(params.target); + ci.format = format; + ci.mipLevels = params.num_levels; + ci.arrayLayers = static_cast<u32>(params.GetNumLayers()); + ci.samples = VK_SAMPLE_COUNT_1_BIT; + ci.tiling = VK_IMAGE_TILING_OPTIMAL; + ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ci.queueFamilyIndexCount = 0; + ci.pQueueFamilyIndices = nullptr; + ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (attachable) { - image_usage |= params.IsPixelFormatZeta() ? vk::ImageUsageFlagBits::eDepthStencilAttachment - : vk::ImageUsageFlagBits::eColorAttachment; + ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT + : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; } if (storage) { - image_usage |= vk::ImageUsageFlagBits::eStorage; + ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT; } - vk::ImageCreateFlags flags; - vk::Extent3D extent; switch (params.target) { case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - flags |= vk::ImageCreateFlagBits::eCubeCompatible; + ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; [[fallthrough]]; case SurfaceTarget::Texture1D: case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2D: case SurfaceTarget::Texture2DArray: - extent = vk::Extent3D(params.width, params.height, 1); + ci.extent = {params.width, params.height, 1}; break; case SurfaceTarget::Texture3D: - extent = vk::Extent3D(params.width, params.height, params.depth); + ci.extent = {params.width, params.height, params.depth}; break; case SurfaceTarget::TextureBuffer: UNREACHABLE(); } - return vk::ImageCreateInfo(flags, SurfaceTargetToImage(params.target), format, extent, - params.num_levels, static_cast<u32>(params.GetNumLayers()), - sample_count, tiling, image_usage, vk::SharingMode::eExclusive, 0, - nullptr, vk::ImageLayout::eUndefined); + return ci; } } // Anonymous namespace @@ -175,15 +190,13 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} { if (params.IsBuffer()) { buffer = CreateBuffer(device, params, host_memory_size); - commit = memory_manager.Commit(*buffer, false); + commit = memory_manager.Commit(buffer, false); const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size); format = buffer_view_ci.format; - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - buffer_view = dev.createBufferViewUnique(buffer_view_ci, nullptr, dld); + buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci); } else { const auto image_ci = GenerateImageCreateInfo(device, params); format = image_ci.format; @@ -221,16 +234,15 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { // We can't copy images to buffers inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); - FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, - vk::ImageLayout::eTransferSrcOptimal); + FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); // TODO(Rodrigo): Do this in a single copy for (u32 level = 0; level < params.num_levels; ++level) { - scheduler.Record([image = image->GetHandle(), buffer = *buffer.handle, - copy = GetBufferImageCopy(level)](auto cmdbuf, auto& dld) { - cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, buffer, {copy}, - dld); + scheduler.Record([image = *image->GetHandle(), buffer = *buffer.handle, + copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); }); } scheduler.Finish(); @@ -257,15 +269,27 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, - size = host_memory_size](auto cmdbuf, auto& dld) { - const vk::BufferCopy copy(0, 0, size); - cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld); - - cmdbuf.pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eVertexShader, {}, {}, - {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, - vk::AccessFlagBits::eShaderRead, 0, 0, dst_buffer, 0, size)}, - {}, dld); + size = host_memory_size](vk::CommandBuffer cmdbuf) { + VkBufferCopy copy; + copy.srcOffset = 0; + copy.dstOffset = 0; + copy.size = size; + cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + barrier.dstAccessMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + barrier.srcQueueFamilyIndex = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstQueueFamilyIndex = VK_ACCESS_SHADER_READ_BIT; + barrier.srcQueueFamilyIndex = 0; + barrier.dstQueueFamilyIndex = 0; + barrier.buffer = dst_buffer; + barrier.offset = 0; + barrier.size = size; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, {}, barrier, {}); }); } @@ -273,43 +297,49 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); - FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite, - vk::ImageLayout::eTransferDstOptimal); + FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); for (u32 level = 0; level < params.num_levels; ++level) { - vk::BufferImageCopy copy = GetBufferImageCopy(level); - if (image->GetAspectMask() == - (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { - vk::BufferImageCopy depth = copy; - vk::BufferImageCopy stencil = copy; - depth.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; - stencil.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; - scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), depth, - stencil](auto cmdbuf, auto& dld) { - cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, - {depth, stencil}, dld); + const VkBufferImageCopy copy = GetBufferImageCopy(level); + if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), + copy](vk::CommandBuffer cmdbuf) { + std::array<VkBufferImageCopy, 2> copies = {copy, copy}; + copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + copies); }); } else { - scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), - copy](auto cmdbuf, auto& dld) { - cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, - {copy}, dld); + scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), + copy](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); }); } } } -vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { - const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); - - return vk::BufferImageCopy( - mip_offset, 0, 0, - {image->GetAspectMask(), level, 0, static_cast<u32>(params.GetNumLayers())}, {0, 0, 0}, - {params.GetMipWidth(level), params.GetMipHeight(level), vk_depth}); +VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { + VkBufferImageCopy copy; + copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted); + copy.bufferRowLength = 0; + copy.bufferImageHeight = 0; + copy.imageSubresource.aspectMask = image->GetAspectMask(); + copy.imageSubresource.mipLevel = level; + copy.imageSubresource.baseArrayLayer = 0; + copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers()); + copy.imageOffset.x = 0; + copy.imageOffset.y = 0; + copy.imageOffset.z = 0; + copy.imageExtent.width = params.GetMipWidth(level); + copy.imageExtent.height = params.GetMipHeight(level); + copy.imageExtent.depth = + params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; + return copy; } -vk::ImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { +VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { return {image->GetAspectMask(), 0, params.num_levels, 0, static_cast<u32>(params.GetNumLayers())}; } @@ -321,12 +351,12 @@ CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surf aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) - : vk::ImageViewType{}} {} + : VK_IMAGE_VIEW_TYPE_1D} {} CachedSurfaceView::~CachedSurfaceView() = default; -vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, - SwizzleSource z_source, SwizzleSource w_source) { +VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, + SwizzleSource z_source, SwizzleSource w_source) { const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); if (last_image_view && last_swizzle == swizzle) { return last_image_view; @@ -351,37 +381,45 @@ vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource // Games can sample depth or stencil values on textures. This is decided by the swizzle value on // hardware. To emulate this on Vulkan we specify it in the aspect. - vk::ImageAspectFlags aspect = aspect_mask; - if (aspect == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { + VkImageAspectFlags aspect = aspect_mask; + if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); const bool is_first = x_source == SwizzleSource::R; switch (params.pixel_format) { case VideoCore::Surface::PixelFormat::Z24S8: case VideoCore::Surface::PixelFormat::Z32FS8: - aspect = is_first ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eStencil; + aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; break; case VideoCore::Surface::PixelFormat::S8Z24: - aspect = is_first ? vk::ImageAspectFlagBits::eStencil : vk::ImageAspectFlagBits::eDepth; + aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; break; default: - aspect = vk::ImageAspectFlagBits::eDepth; + aspect = VK_IMAGE_ASPECT_DEPTH_BIT; UNIMPLEMENTED(); } // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity - swizzle_x = vk::ComponentSwizzle::eR; - swizzle_y = vk::ComponentSwizzle::eG; - swizzle_z = vk::ComponentSwizzle::eB; - swizzle_w = vk::ComponentSwizzle::eA; + swizzle_x = VK_COMPONENT_SWIZZLE_R; + swizzle_y = VK_COMPONENT_SWIZZLE_G; + swizzle_z = VK_COMPONENT_SWIZZLE_B; + swizzle_w = VK_COMPONENT_SWIZZLE_A; } - const vk::ImageViewCreateInfo image_view_ci( - {}, surface.GetImageHandle(), image_view_type, surface.GetImage().GetFormat(), - {swizzle_x, swizzle_y, swizzle_z, swizzle_w}, - {aspect, base_level, num_levels, base_layer, num_layers}); + VkImageViewCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.image = surface.GetImageHandle(); + ci.viewType = image_view_type; + ci.format = surface.GetImage().GetFormat(); + ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; + ci.subresourceRange.aspectMask = aspect; + ci.subresourceRange.baseMipLevel = base_level; + ci.subresourceRange.levelCount = num_levels; + ci.subresourceRange.baseArrayLayer = base_layer; + ci.subresourceRange.layerCount = num_layers; + image_view = device.GetLogical().CreateImageView(ci); - const auto dev = device.GetLogical(); - image_view = dev.createImageViewUnique(image_view_ci, nullptr, device.GetDispatchLoader()); return last_image_view = *image_view; } @@ -418,25 +456,36 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, scheduler.RequestOutsideRenderPassOperationContext(); src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, - vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, - vk::ImageLayout::eTransferSrcOptimal); - dst_surface->Transition( - dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); - - const vk::ImageSubresourceLayers src_subresource( - src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); - const vk::ImageSubresourceLayers dst_subresource( - dst_surface->GetAspectMask(), copy_params.dest_level, dst_base_layer, num_layers); - const vk::Offset3D src_offset(copy_params.source_x, copy_params.source_y, 0); - const vk::Offset3D dst_offset(copy_params.dest_x, copy_params.dest_y, dst_offset_z); - const vk::Extent3D extent(copy_params.width, copy_params.height, extent_z); - const vk::ImageCopy copy(src_subresource, src_offset, dst_subresource, dst_offset, extent); - const vk::Image src_image = src_surface->GetImageHandle(); - const vk::Image dst_image = dst_surface->GetImageHandle(); - scheduler.Record([src_image, dst_image, copy](auto cmdbuf, auto& dld) { - cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, - vk::ImageLayout::eTransferDstOptimal, {copy}, dld); + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + VkImageCopy copy; + copy.srcSubresource.aspectMask = src_surface->GetAspectMask(); + copy.srcSubresource.mipLevel = copy_params.source_level; + copy.srcSubresource.baseArrayLayer = copy_params.source_z; + copy.srcSubresource.layerCount = num_layers; + copy.srcOffset.x = copy_params.source_x; + copy.srcOffset.y = copy_params.source_y; + copy.srcOffset.z = 0; + copy.dstSubresource.aspectMask = dst_surface->GetAspectMask(); + copy.dstSubresource.mipLevel = copy_params.dest_level; + copy.dstSubresource.baseArrayLayer = dst_base_layer; + copy.dstSubresource.layerCount = num_layers; + copy.dstOffset.x = copy_params.dest_x; + copy.dstOffset.y = copy_params.dest_y; + copy.dstOffset.z = dst_offset_z; + copy.extent.width = copy_params.width; + copy.extent.height = copy_params.height; + copy.extent.depth = extent_z; + + const VkImage src_image = src_surface->GetImageHandle(); + const VkImage dst_image = dst_surface->GetImageHandle(); + scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { + cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); }); } @@ -445,25 +494,34 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view, // We can't blit inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); - src_view->Transition(vk::ImageLayout::eTransferSrcOptimal, vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferRead); - dst_view->Transition(vk::ImageLayout::eTransferDstOptimal, vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferWrite); - - const auto& cfg = copy_config; - const auto src_top_left = vk::Offset3D(cfg.src_rect.left, cfg.src_rect.top, 0); - const auto src_bot_right = vk::Offset3D(cfg.src_rect.right, cfg.src_rect.bottom, 1); - const auto dst_top_left = vk::Offset3D(cfg.dst_rect.left, cfg.dst_rect.top, 0); - const auto dst_bot_right = vk::Offset3D(cfg.dst_rect.right, cfg.dst_rect.bottom, 1); - const vk::ImageBlit blit(src_view->GetImageSubresourceLayers(), {src_top_left, src_bot_right}, - dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); + src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_READ_BIT); + dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT); + + VkImageBlit blit; + blit.srcSubresource = src_view->GetImageSubresourceLayers(); + blit.srcOffsets[0].x = copy_config.src_rect.left; + blit.srcOffsets[0].y = copy_config.src_rect.top; + blit.srcOffsets[0].z = 0; + blit.srcOffsets[1].x = copy_config.src_rect.right; + blit.srcOffsets[1].y = copy_config.src_rect.bottom; + blit.srcOffsets[1].z = 1; + blit.dstSubresource = dst_view->GetImageSubresourceLayers(); + blit.dstOffsets[0].x = copy_config.dst_rect.left; + blit.dstOffsets[0].y = copy_config.dst_rect.top; + blit.dstOffsets[0].z = 0; + blit.dstOffsets[1].x = copy_config.dst_rect.right; + blit.dstOffsets[1].y = copy_config.dst_rect.bottom; + blit.dstOffsets[1].z = 1; + const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, - is_linear](auto cmdbuf, auto& dld) { - cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, - vk::ImageLayout::eTransferDstOptimal, {blit}, - is_linear ? vk::Filter::eLinear : vk::Filter::eNearest, dld); + is_linear](vk::CommandBuffer cmdbuf) { + cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, + is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); }); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 22e3d34de..115595f28 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -13,10 +13,10 @@ #include "common/math_util.h" #include "video_core/gpu.h" #include "video_core/rasterizer_cache.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_image.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/textures/decoders.h" @@ -60,15 +60,15 @@ public: void UploadTexture(const std::vector<u8>& staging_buffer) override; void DownloadTexture(std::vector<u8>& staging_buffer) override; - void FullTransition(vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, - vk::ImageLayout new_layout) { + void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, + VkImageLayout new_layout) { image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, new_stage_mask, new_access, new_layout); } void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, - vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, - vk::ImageLayout new_layout) { + VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, + VkImageLayout new_layout) { image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, new_access, new_layout); } @@ -81,15 +81,15 @@ public: return *image; } - vk::Image GetImageHandle() const { - return image->GetHandle(); + VkImage GetImageHandle() const { + return *image->GetHandle(); } - vk::ImageAspectFlags GetAspectMask() const { + VkImageAspectFlags GetAspectMask() const { return image->GetAspectMask(); } - vk::BufferView GetBufferViewHandle() const { + VkBufferView GetBufferViewHandle() const { return *buffer_view; } @@ -104,9 +104,9 @@ private: void UploadImage(const std::vector<u8>& staging_buffer); - vk::BufferImageCopy GetBufferImageCopy(u32 level) const; + VkBufferImageCopy GetBufferImageCopy(u32 level) const; - vk::ImageSubresourceRange GetImageSubresourceRange() const; + VkImageSubresourceRange GetImageSubresourceRange() const; Core::System& system; const VKDevice& device; @@ -116,11 +116,11 @@ private: VKStagingBufferPool& staging_pool; std::optional<VKImage> image; - UniqueBuffer buffer; - UniqueBufferView buffer_view; + vk::Buffer buffer; + vk::BufferView buffer_view; VKMemoryCommit commit; - vk::Format format; + VkFormat format = VK_FORMAT_UNDEFINED; }; class CachedSurfaceView final : public VideoCommon::ViewBase { @@ -129,16 +129,16 @@ public: const ViewParams& params, bool is_proxy); ~CachedSurfaceView(); - vk::ImageView GetHandle(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); + VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source); bool IsSameSurface(const CachedSurfaceView& rhs) const { return &surface == &rhs.surface; } - vk::ImageView GetHandle() { + VkImageView GetHandle() { return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A); } @@ -159,24 +159,24 @@ public: return buffer_view; } - vk::Image GetImage() const { + VkImage GetImage() const { return image; } - vk::BufferView GetBufferView() const { + VkBufferView GetBufferView() const { return buffer_view; } - vk::ImageSubresourceRange GetImageSubresourceRange() const { + VkImageSubresourceRange GetImageSubresourceRange() const { return {aspect_mask, base_level, num_levels, base_layer, num_layers}; } - vk::ImageSubresourceLayers GetImageSubresourceLayers() const { + VkImageSubresourceLayers GetImageSubresourceLayers() const { return {surface.GetAspectMask(), base_level, base_layer, num_layers}; } - void Transition(vk::ImageLayout new_layout, vk::PipelineStageFlags new_stage_mask, - vk::AccessFlags new_access) const { + void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, + VkAccessFlags new_access) const { surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, new_access, new_layout); } @@ -196,9 +196,9 @@ private: // Store a copy of these values to avoid double dereference when reading them const SurfaceParams params; - const vk::Image image; - const vk::BufferView buffer_view; - const vk::ImageAspectFlags aspect_mask; + const VkImage image; + const VkBufferView buffer_view; + const VkImageAspectFlags aspect_mask; const VKDevice& device; CachedSurface& surface; @@ -206,12 +206,12 @@ private: const u32 num_layers; const u32 base_level; const u32 num_levels; - const vk::ImageViewType image_view_type; + const VkImageViewType image_view_type; - vk::ImageView last_image_view; - u32 last_swizzle{}; + VkImageView last_image_view = nullptr; + u32 last_swizzle = 0; - std::unordered_map<u32, UniqueImageView> view_cache; + std::unordered_map<u32, vk::ImageView> view_cache; }; class VKTextureCache final : public TextureCacheBase { diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 0e577b9ff..681ecde98 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -7,10 +7,10 @@ #include "common/assert.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -27,31 +27,32 @@ void VKUpdateDescriptorQueue::Acquire() { entries.clear(); } -void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template, - vk::DescriptorSet set) { +void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, + VkDescriptorSet set) { if (payload.size() + entries.size() >= payload.max_size()) { LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); scheduler.WaitWorker(); payload.clear(); } + // TODO(Rodrigo): Rework to write the payload directly const auto payload_start = payload.data() + payload.size(); for (const auto& entry : entries) { - if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) { + if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) { payload.push_back(*image); - } else if (const auto buffer = std::get_if<Buffer>(&entry)) { - payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size); - } else if (const auto texel = std::get_if<vk::BufferView>(&entry)) { + } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) { + payload.push_back(*buffer); + } else if (const auto texel = std::get_if<VkBufferView>(&entry)) { payload.push_back(*texel); } else { UNREACHABLE(); } } - scheduler.Record([dev = device.GetLogical(), payload_start, set, - update_template]([[maybe_unused]] auto cmdbuf, auto& dld) { - dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld); - }); + scheduler.Record( + [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) { + logical->UpdateDescriptorSet(set, update_template, payload_start); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 8c825aa29..6ba2c9997 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -9,7 +9,7 @@ #include <boost/container/static_vector.hpp> #include "common/common_types.h" -#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -18,20 +18,19 @@ class VKScheduler; class DescriptorUpdateEntry { public: - explicit DescriptorUpdateEntry() : image{} {} + explicit DescriptorUpdateEntry() {} - DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {} + DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {} - DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size) - : buffer{buffer, offset, size} {} + DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {} - DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {} + DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} private: union { - vk::DescriptorImageInfo image; - vk::DescriptorBufferInfo buffer; - vk::BufferView texel_buffer; + VkDescriptorImageInfo image; + VkDescriptorBufferInfo buffer; + VkBufferView texel_buffer; }; }; @@ -44,37 +43,30 @@ public: void Acquire(); - void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set); + void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); - void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) { - entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}}); + void AddSampledImage(VkSampler sampler, VkImageView image_view) { + entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); } - void AddImage(vk::ImageView image_view) { - entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}}); + void AddImage(VkImageView image_view) { + entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); } - void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) { - entries.push_back(Buffer{buffer, offset, size}); + void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { + entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); } - void AddTexelBuffer(vk::BufferView texel_buffer) { + void AddTexelBuffer(VkBufferView texel_buffer) { entries.emplace_back(texel_buffer); } - vk::ImageLayout* GetLastImageLayout() { - return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout; + VkImageLayout* GetLastImageLayout() { + return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout; } private: - struct Buffer { - const vk::Buffer* buffer{}; - u64 offset{}; - std::size_t size{}; - }; - using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>; - // Old gcc versions don't consider this trivially copyable. - // static_assert(std::is_trivially_copyable_v<Variant>); + using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>; const VKDevice& device; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 9b94dfff1..3a52a3a6f 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -61,7 +61,6 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdPipelineBarrier); X(vkCmdPushConstants); X(vkCmdSetBlendConstants); - X(vkCmdSetCheckpointNV); X(vkCmdSetDepthBias); X(vkCmdSetDepthBounds); X(vkCmdSetScissor); @@ -116,7 +115,6 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkGetFenceStatus); X(vkGetImageMemoryRequirements); X(vkGetQueryPoolResults); - X(vkGetQueueCheckpointDataNV); X(vkMapMemory); X(vkQueueSubmit); X(vkResetFences); @@ -409,17 +407,6 @@ DebugCallback Instance::TryCreateDebugCallback( return DebugCallback(messenger, handle, *dld); } -std::vector<VkCheckpointDataNV> Queue::GetCheckpointDataNV(const DeviceDispatch& dld) const { - if (!dld.vkGetQueueCheckpointDataNV) { - return {}; - } - u32 num; - dld.vkGetQueueCheckpointDataNV(queue, &num, nullptr); - std::vector<VkCheckpointDataNV> checkpoints(num); - dld.vkGetQueueCheckpointDataNV(queue, &num, checkpoints.data()); - return checkpoints; -} - void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); } @@ -469,12 +456,11 @@ std::vector<VkImage> SwapchainKHR::GetImages() const { } Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, - Span<const char*> enabled_extensions, - const VkPhysicalDeviceFeatures2& enabled_features, + Span<const char*> enabled_extensions, const void* next, DeviceDispatch& dld) noexcept { VkDeviceCreateInfo ci; ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - ci.pNext = &enabled_features; + ci.pNext = next; ci.flags = 0; ci.queueCreateInfoCount = queues_ci.size(); ci.pQueueCreateInfos = queues_ci.data(); diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index fb3657819..6fe0294d8 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -197,7 +197,6 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; PFN_vkCmdPushConstants vkCmdPushConstants; PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; - PFN_vkCmdSetCheckpointNV vkCmdSetCheckpointNV; PFN_vkCmdSetDepthBias vkCmdSetDepthBias; PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; PFN_vkCmdSetScissor vkCmdSetScissor; @@ -252,7 +251,6 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkGetFenceStatus vkGetFenceStatus; PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; PFN_vkGetQueryPoolResults vkGetQueryPoolResults; - PFN_vkGetQueueCheckpointDataNV vkGetQueueCheckpointDataNV; PFN_vkMapMemory vkMapMemory; PFN_vkQueueSubmit vkQueueSubmit; PFN_vkResetFences vkResetFences; @@ -567,12 +565,8 @@ public: /// Construct a queue handle. constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {} - /// Returns the checkpoint data. - /// @note Returns an empty vector when the function pointer is not present. - std::vector<VkCheckpointDataNV> GetCheckpointDataNV(const DeviceDispatch& dld) const; - - void Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const { - Check(dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence)); + VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const noexcept { + return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence); } VkResult Present(const VkPresentInfoKHR& present_info) const noexcept { @@ -659,8 +653,7 @@ class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { public: static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, - Span<const char*> enabled_extensions, - const VkPhysicalDeviceFeatures2& enabled_features, + Span<const char*> enabled_extensions, const void* next, DeviceDispatch& dld) noexcept; Queue GetQueue(u32 family_index) const noexcept; @@ -734,18 +727,11 @@ public: dld->vkResetQueryPoolEXT(handle, query_pool, first, count); } - void GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size, - void* data, VkDeviceSize stride, VkQueryResultFlags flags) const { - Check(dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride, - flags)); - } - - template <typename T> - T GetQueryResult(VkQueryPool query_pool, u32 first, VkQueryResultFlags flags) const { - static_assert(std::is_trivially_copyable_v<T>); - T value; - GetQueryResults(query_pool, first, 1, sizeof(T), &value, sizeof(T), flags); - return value; + VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size, + void* data, VkDeviceSize stride, VkQueryResultFlags flags) const + noexcept { + return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride, + flags); } }; @@ -920,10 +906,6 @@ public: dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); } - void SetCheckpointNV(const void* checkpoint_marker) const noexcept { - dld->vkCmdSetCheckpointNV(handle, checkpoint_marker); - } - void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); } diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 2e2711350..6d313963a 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -484,17 +484,17 @@ bool TryInspectAddress(CFGRebuildState& state) { } case BlockCollision::Inside: { // This case is the tricky one: - // We need to Split the block in 2 sepparate blocks + // We need to split the block into 2 separate blocks const u32 end = state.block_info[block_index].end; BlockInfo& new_block = CreateBlockInfo(state, address, end); BlockInfo& current_block = state.block_info[block_index]; current_block.end = address - 1; - new_block.branch = current_block.branch; + new_block.branch = std::move(current_block.branch); BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); const auto branch = std::get_if<SingleBranch>(forward_branch.get()); branch->address = address; branch->ignore = true; - current_block.branch = forward_branch; + current_block.branch = std::move(forward_branch); return true; } default: diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 478394682..4db329fa5 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } - case OpCode::Id::FCMP_R: { + case OpCode::Id::FCMP_RR: + case OpCode::Id::FCMP_RC: { UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); Node op_c = GetRegister(instr.gpr39); Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 0dd7a1196..85ee9aa5e 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -352,8 +352,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); } else { const Node image_register = GetRegister(instr.gpr39); - const auto [base_image, buffer, offset] = TrackCbuf( - image_register, global_code, static_cast<s64>(global_code.size())); + const auto result = TrackCbuf(image_register, global_code, + static_cast<s64>(global_code.size())); + const auto buffer = std::get<1>(result); + const auto offset = std::get<2>(result); descriptor = registry.ObtainBindlessSampler(buffer, offset); } if (!descriptor) { @@ -497,9 +499,12 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { const Node image_register = GetRegister(reg); - const auto [base_image, buffer, offset] = + const auto result = TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); + const auto buffer = std::get<1>(result); + const auto offset = std::get<2>(result); + const auto it = std::find_if(std::begin(used_images), std::end(used_images), [buffer = buffer, offset = offset](const Image& entry) { diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index b8f63922f..9392f065b 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -3,7 +3,9 @@ // Refer to the license.txt file included. #include <algorithm> +#include <utility> #include <vector> + #include <fmt/format.h> #include "common/alignment.h" @@ -16,6 +18,7 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; @@ -27,29 +30,26 @@ using Tegra::Shader::StoreType; namespace { -Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { - const OperationCode operation_code = [op] { - switch (op) { - case AtomicOp::Add: - return OperationCode::AtomicIAdd; - case AtomicOp::Min: - return OperationCode::AtomicIMin; - case AtomicOp::Max: - return OperationCode::AtomicIMax; - case AtomicOp::And: - return OperationCode::AtomicIAnd; - case AtomicOp::Or: - return OperationCode::AtomicIOr; - case AtomicOp::Xor: - return OperationCode::AtomicIXor; - case AtomicOp::Exch: - return OperationCode::AtomicIExchange; - default: - UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); - return OperationCode::AtomicIAdd; - } - }(); - return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data)); +OperationCode GetAtomOperation(AtomicOp op) { + switch (op) { + case AtomicOp::Add: + return OperationCode::AtomicIAdd; + case AtomicOp::Min: + return OperationCode::AtomicIMin; + case AtomicOp::Max: + return OperationCode::AtomicIMax; + case AtomicOp::And: + return OperationCode::AtomicIAnd; + case AtomicOp::Or: + return OperationCode::AtomicIOr; + case AtomicOp::Xor: + return OperationCode::AtomicIXor; + case AtomicOp::Exch: + return OperationCode::AtomicIExchange; + default: + UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); + return OperationCode::AtomicIAdd; + } } bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { @@ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), - Immediate(size)); + offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); } Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), - std::move(offset), Immediate(size)); + Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), + Immediate(size)); } Node Sign16Extend(Node value) { Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); - Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); + Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); - return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); + return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); } } // Anonymous namespace @@ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { if (IsUnaligned(type)) { const u32 mask = GetUnalignedMask(type); - value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); + value = InsertUnaligned(gmem, move(value), real_address, mask, size); } bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } + case OpCode::Id::RED: { + UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); + UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add); + const auto [real_address, base_address, descriptor] = + TrackGlobalMemory(bb, instr, true, true); + if (!real_address || !base_address) { + // Tracking failed, skip atomic. + break; + } + Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + Node value = GetRegister(instr.gpr0); + bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); + break; + } case OpCode::Id::ATOM: { UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || instr.atom.operation == AtomicOp::Dec || instr.atom.operation == AtomicOp::SafeAdd, "operation={}", static_cast<int>(instr.atom.operation.Value())); UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || - instr.atom.type == GlobalAtomicType::U64, + instr.atom.type == GlobalAtomicType::U64 || + instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || + instr.atom.type == GlobalAtomicType::F32_FTZ_RN, "type={}", static_cast<int>(instr.atom.type.Value())); const auto [real_address, base_address, descriptor] = @@ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } const bool is_signed = - instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; + instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem, - GetRegister(instr.gpr20)); - SetRegister(bb, instr.gpr0, std::move(value)); + SetRegister(bb, instr.gpr0, + SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, + GetRegister(instr.gpr20))); break; } case OpCode::Id::ATOMS: { @@ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; const s32 offset = instr.atoms.GetImmediateOffset(); Node address = GetRegister(instr.gpr8); - address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); - Node value = - GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed, - GetSharedMemory(std::move(address)), GetRegister(instr.gpr20)); - SetRegister(bb, instr.gpr0, std::move(value)); + address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); + SetRegister(bb, instr.gpr0, + SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, + GetSharedMemory(move(address)), GetRegister(instr.gpr20))); break; } case OpCode::Id::AL2P: { @@ -465,7 +479,7 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); const GlobalMemoryBase descriptor{index, offset}; - const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); + const auto& entry = used_global_memory.try_emplace(descriptor).first; auto& usage = entry->second; usage.is_written |= is_write; usage.is_read |= is_read; diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 3b391d3e6..d4ffa8014 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -23,7 +23,6 @@ Node IsFull(Node shift) { } Node Shift(OperationCode opcode, Node value, Node shift) { - Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32)); Node shifted = Operation(opcode, move(value), shift); return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 6c4a1358b..e68f1d305 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -139,7 +139,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); - const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare}; + const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare, false}; const Sampler& sampler = *GetSampler(instr.sampler, info); Node4 values; @@ -171,13 +171,12 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto coord_count = GetCoordCount(texture_type); Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}}) - : GetSampler(instr.sampler, {{texture_type, is_array, false}}); + is_bindless + ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false, false}}) + : GetSampler(instr.sampler, {{texture_type, is_array, false, false}}); Node4 values; if (sampler == nullptr) { - for (u32 element = 0; element < values.size(); ++element) { - values[element] = Immediate(0); - } + std::generate(values.begin(), values.end(), [] { return Immediate(0); }); WriteTexInstructionFloat(bb, instr, values); break; } @@ -269,7 +268,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { "NDV is not implemented"); auto texture_type = instr.tmml.texture_type.Value(); - const bool is_array = instr.tmml.array != 0; Node index_var{}; const Sampler* sampler = is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); @@ -593,8 +591,9 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, ++parameter_register; } - const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( - texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); + const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, + lod_bias_enabled, 4, 5); + const auto coord_count = std::get<0>(coord_counts); // If enabled arrays index is always stored in the gpr8 field const u64 array_register = instr.gpr8.Value(); // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used @@ -632,8 +631,10 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, const bool lod_bias_enabled = (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); - const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( - texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); + const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, + lod_bias_enabled, 4, 4); + const auto coord_count = std::get<0>(coord_counts); + // If enabled arrays index is always stored in the gpr8 field const u64 array_register = instr.gpr8.Value(); // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 5fcc9da60..3eee961f5 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -178,6 +178,20 @@ enum class OperationCode { AtomicIOr, /// (memory, int) -> int AtomicIXor, /// (memory, int) -> int + ReduceUAdd, /// (memory, uint) -> void + ReduceUMin, /// (memory, uint) -> void + ReduceUMax, /// (memory, uint) -> void + ReduceUAnd, /// (memory, uint) -> void + ReduceUOr, /// (memory, uint) -> void + ReduceUXor, /// (memory, uint) -> void + + ReduceIAdd, /// (memory, int) -> void + ReduceIMin, /// (memory, int) -> void + ReduceIMax, /// (memory, int) -> void + ReduceIAnd, /// (memory, int) -> void + ReduceIOr, /// (memory, int) -> void + ReduceIXor, /// (memory, int) -> void + Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 8852c8a1b..822674926 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -56,8 +56,7 @@ Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { const auto index = static_cast<u32>(index_); const auto offset = static_cast<u32>(offset_); - const auto [entry, is_new] = used_cbufs.try_emplace(index); - entry->second.MarkAsUsed(offset); + used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); return MakeNode<CbufNode>(index, Immediate(offset)); } @@ -66,8 +65,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { const auto index = static_cast<u32>(index_); const auto offset = static_cast<u32>(offset_); - const auto [entry, is_new] = used_cbufs.try_emplace(index); - entry->second.MarkAsUsedIndirect(); + used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); Node final_offset = [&] { // Attempt to inline constant buffer without a variable offset. This is done to allow @@ -166,6 +164,7 @@ Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signe std::move(value), Immediate(16)); value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, std::move(value), Immediate(16)); + return value; case Register::Size::Word: // Default - do nothing return value; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 10739b37d..513e9bf49 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -27,8 +27,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { const auto& conditional_code = conditional->GetCode(); - auto [found, internal_cursor] = FindOperation( + auto result = FindOperation( conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); + auto& found = result.first; if (found) { return {std::move(found), cursor}; } @@ -75,12 +76,13 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons s64 cursor) { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { // Constant buffer found, test if it's an immediate - const auto offset = cbuf->GetOffset(); + const auto& offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); return {tracked, track}; - } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { + } + if (const auto operation = std::get_if<OperationNode>(&*offset)) { const u32 bound_buffer = registry.GetBoundBuffer(); if (bound_buffer != cbuf->GetIndex()) { return {}; @@ -93,12 +95,12 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); const auto& gpu_driver = registry.AccessGuestDriverProfile(); const u32 bindless_cv = NewCustomVariable(); - const Node op = + Node op = Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); const Node cv_node = GetCustomVariable(bindless_cv); Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); - const std::size_t amend_index = DeclareAmend(amend_op); + const std::size_t amend_index = DeclareAmend(std::move(amend_op)); AmendNodeCv(amend_index, code[cursor]); // TODO Implement Bindless Index custom variable auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), @@ -141,7 +143,7 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { // Constant buffer found, test if it's an immediate - const auto offset = cbuf->GetOffset(); + const auto& offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { return {tracked, cbuf->GetIndex(), immediate->GetValue()}; } @@ -186,8 +188,8 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register // that it uses as operand - const auto [found, found_cursor] = - TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + const auto& found = result.first; if (!found) { return {}; } diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index e151c26c4..25d2ee2e8 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -196,9 +196,9 @@ std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb ComponentType alpha_component) noexcept { const auto format_index = static_cast<std::size_t>(format); const auto red_index = static_cast<std::size_t>(red_component); - const auto green_index = static_cast<std::size_t>(red_component); - const auto blue_index = static_cast<std::size_t>(red_component); - const auto alpha_index = static_cast<std::size_t>(red_component); + const auto green_index = static_cast<std::size_t>(green_component); + const auto blue_index = static_cast<std::size_t>(blue_component); + const auto alpha_index = static_cast<std::size_t>(alpha_component); const std::size_t srgb_index = is_srgb ? 1 : 0; return format_index * PerFormat + diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7af0e792c..715f39d0d 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -248,8 +248,14 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, // Use an extra temporal buffer auto& tmp_buffer = staging_cache.GetBuffer(1); + // Special case for 3D Texture Segments + const bool must_read_current_data = + params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D; tmp_buffer.resize(guest_memory_size); host_ptr = tmp_buffer.data(); + if (must_read_current_data) { + memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); + } if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index a39a8661b..c5ab21f56 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -72,9 +72,9 @@ public: return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { + bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const { const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; - return (gpu_addr <= other_start && other_end <= gpu_addr_end); + return gpu_addr <= other_start && other_end <= gpu_addr_end; } // Use only when recycling a surface diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 6f3ef45be..0de499946 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -167,7 +167,6 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { const auto& regs = system.GPU().Maxwell3D().regs; - regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type; SurfaceParams params; params.is_tiled = regs.zeta.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp index 57a1f5803..6b5f5984b 100644 --- a/src/video_core/texture_cache/surface_view.cpp +++ b/src/video_core/texture_cache/surface_view.cpp @@ -20,4 +20,8 @@ bool ViewParams::operator==(const ViewParams& rhs) const { std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); } +bool ViewParams::operator!=(const ViewParams& rhs) const { + return !operator==(rhs); +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index b17fd11a9..90a8bb0ae 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -21,6 +21,7 @@ struct ViewParams { std::size_t Hash() const; bool operator==(const ViewParams& rhs) const; + bool operator!=(const ViewParams& rhs) const; bool IsLayered() const { switch (target) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index cfc7fe6e9..69ca08fd1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -509,7 +509,9 @@ private: } const auto& final_params = new_surface->GetSurfaceParams(); if (cr_params.type != final_params.type) { - BufferCopy(current_surface, new_surface); + if (Settings::values.use_accurate_gpu_emulation) { + BufferCopy(current_surface, new_surface); + } } else { std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); for (auto& brick : bricks) { @@ -612,10 +614,10 @@ private: * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of * the HLE methods. * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - * @param cache_addr The starting address of the new surface on physical memory. + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. + * @param cpu_addr The starting address of the new surface on physical memory. */ std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, const SurfaceParams& params, @@ -645,7 +647,8 @@ private: break; } const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); - const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); + const auto offsets = params.GetBlockOffsetXYZ(offset); + const auto z = std::get<2>(offsets); modified |= surface->IsModified(); const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, 1); |
