diff options
Diffstat (limited to 'src/video_core')
51 files changed, 2763 insertions, 1865 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 21c46a567..3cd896a0f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -98,6 +98,8 @@ add_library(video_core STATIC sampler_cache.cpp sampler_cache.h shader_cache.h + shader_notify.cpp + shader_notify.h shader/decode/arithmetic.cpp shader/decode/arithmetic_immediate.cpp shader/decode/bfe.cpp @@ -128,6 +130,8 @@ add_library(video_core STATIC shader/decode/other.cpp shader/ast.cpp shader/ast.h + shader/async_shaders.cpp + shader/async_shaders.h shader/compiler_settings.cpp shader/compiler_settings.h shader/control_flow.cpp diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 01d7df405..a2d3d7823 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -14,50 +14,45 @@ namespace Tegra::Engines { +using namespace Texture; + MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) : system{system}, memory_manager{memory_manager} {} void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { - ASSERT_MSG(method < Regs::NUM_REGS, - "Invalid MaxwellDMA register, increase the size of the Regs structure"); + ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); regs.reg_array[method] = method_argument; -#define MAXWELLDMA_REG_INDEX(field_name) \ - (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) - - switch (method) { - case MAXWELLDMA_REG_INDEX(exec): { - HandleCopy(); - break; - } + if (method == offsetof(Regs, launch_dma) / sizeof(u32)) { + Launch(); } - -#undef MAXWELLDMA_REG_INDEX } void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { - for (std::size_t i = 0; i < amount; i++) { + for (size_t i = 0; i < amount; ++i) { CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); } } -void MaxwellDMA::HandleCopy() { - LOG_TRACE(HW_GPU, "Requested a DMA copy"); - - const GPUVAddr source = regs.src_address.Address(); - const GPUVAddr dest = regs.dst_address.Address(); +void MaxwellDMA::Launch() { + LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in), + static_cast<GPUVAddr>(regs.offset_out)); // TODO(Subv): Perform more research and implement all features of this engine. - ASSERT(regs.exec.enable_swizzle == 0); - ASSERT(regs.exec.query_mode == Regs::QueryMode::None); - ASSERT(regs.exec.query_intr == Regs::QueryIntr::None); - ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2); - ASSERT(regs.dst_params.pos_x == 0); - ASSERT(regs.dst_params.pos_y == 0); - - if (!regs.exec.is_dst_linear && !regs.exec.is_src_linear) { + const LaunchDMA& launch = regs.launch_dma; + ASSERT(launch.remap_enable == 0); + ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); + ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); + ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); + ASSERT(regs.dst_params.origin.x == 0); + ASSERT(regs.dst_params.origin.y == 0); + + const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; + const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; + + if (!is_src_pitch && !is_dst_pitch) { // If both the source and the destination are in block layout, assert. UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented"); return; @@ -66,144 +61,161 @@ void MaxwellDMA::HandleCopy() { // All copies here update the main memory, so mark all rasterizer states as invalid. system.GPU().Maxwell3D().OnMemoryWrite(); - if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { - // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D - // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, - // y_count). - if (!regs.exec.enable_2d) { - memory_manager.CopyBlock(dest, source, regs.x_count); - return; - } + if (is_src_pitch && is_dst_pitch) { + CopyPitchToPitch(); + } else { + ASSERT(launch.multi_line_enable == 1); - // If both the source and the destination are in linear layout, perform a line-by-line - // copy. We're going to take a subrect of size (x_count, y_count) from the source - // rectangle. There is no need to manually flush/invalidate the regions because - // CopyBlock does that for us. - for (u32 line = 0; line < regs.y_count; ++line) { - const GPUVAddr source_line = source + line * regs.src_pitch; - const GPUVAddr dest_line = dest + line * regs.dst_pitch; - memory_manager.CopyBlock(dest_line, source_line, regs.x_count); + if (!is_src_pitch && is_dst_pitch) { + CopyBlockLinearToPitch(); + } else { + CopyPitchToBlockLinear(); } - return; } +} - ASSERT(regs.exec.enable_2d == 1); - - if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { - - ASSERT(regs.src_params.BlockDepth() == 0); - // Optimized path for micro copies. - if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) { - const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; - const std::size_t src_size = Texture::GetGOBSize(); - const std::size_t dst_size = regs.dst_pitch * regs.y_count; - u32 pos_x = regs.src_params.pos_x; - u32 pos_y = regs.src_params.pos_y; - const u64 offset = - Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y, - regs.src_params.BlockDepth(), bytes_per_pixel); - const u32 x_in_gob = 64 / bytes_per_pixel; - pos_x = pos_x % x_in_gob; - pos_y = pos_y % 8; - - if (read_buffer.size() < src_size) { - read_buffer.resize(src_size); - } - - if (write_buffer.size() < dst_size) { - write_buffer.resize(dst_size); - } - - if (Settings::IsGPULevelExtreme()) { - memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size); - memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); - } else { - memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size); - memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); - } - - Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, - regs.src_params.size_x, bytes_per_pixel, read_buffer.data(), - write_buffer.data(), regs.src_params.BlockHeight(), pos_x, - pos_y); - - memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); - - return; - } - // If the input is tiled and the output is linear, deswizzle the input and copy it over. - const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; - const std::size_t src_size = Texture::CalculateSize( - true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, - regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); - - const std::size_t src_layer_size = Texture::CalculateSize( - true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1, - regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); - - const std::size_t dst_size = regs.dst_pitch * regs.y_count; +void MaxwellDMA::CopyPitchToPitch() { + // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D + // buffer of length `line_length_in`. + // Otherwise we copy a 2D image of dimensions (line_length_in, line_count). + if (!regs.launch_dma.multi_line_enable) { + memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in); + return; + } - if (read_buffer.size() < src_size) { - read_buffer.resize(src_size); - } + // Perform a line-by-line copy. + // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle. + // There is no need to manually flush/invalidate the regions because CopyBlock does that for us. + for (u32 line = 0; line < regs.line_count; ++line) { + const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in; + const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out; + memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in); + } +} - if (write_buffer.size() < dst_size) { - write_buffer.resize(dst_size); - } +void MaxwellDMA::CopyBlockLinearToPitch() { + ASSERT(regs.src_params.block_size.depth == 0); - if (Settings::IsGPULevelExtreme()) { - memory_manager.ReadBlock(source, read_buffer.data(), src_size); - memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); - } else { - memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); - memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); - } + // Optimized path for micro copies. + const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; + if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) { + FastCopyBlockLinearToPitch(); + return; + } - Texture::UnswizzleSubrect( - regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, - read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(), - regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y); + // Deswizzle the input and copy it over. + const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; + const Parameters& src_params = regs.src_params; + const u32 width = src_params.width; + const u32 height = src_params.height; + const u32 depth = src_params.depth; + const u32 block_height = src_params.block_size.height; + const u32 block_depth = src_params.block_size.depth; + const size_t src_size = + CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); + const size_t src_layer_size = + CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); + + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } - memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); + if (Settings::IsGPULevelExtreme()) { + memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); + memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); } else { - ASSERT(regs.dst_params.BlockDepth() == 0); + memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); + memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); + } - const u32 bytes_per_pixel = regs.src_pitch / regs.x_count; + UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel, + read_buffer.data() + src_layer_size * src_params.layer, write_buffer.data(), + block_height, src_params.origin.x, src_params.origin.y); - const std::size_t dst_size = Texture::CalculateSize( - true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, - regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); + memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); +} - const std::size_t dst_layer_size = Texture::CalculateSize( - true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, - regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); +void MaxwellDMA::CopyPitchToBlockLinear() { + const auto& dst_params = regs.dst_params; + const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; + const u32 width = dst_params.width; + const u32 height = dst_params.height; + const u32 depth = dst_params.depth; + const u32 block_height = dst_params.block_size.height; + const u32 block_depth = dst_params.block_size.depth; + const size_t dst_size = + CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); + const size_t dst_layer_size = + CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); + + const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; + + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } - const std::size_t src_size = regs.src_pitch * regs.y_count; + if (Settings::IsGPULevelExtreme()) { + memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); + memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); + } else { + memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); + memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); + } - if (read_buffer.size() < src_size) { - read_buffer.resize(src_size); - } + // If the input is linear and the output is tiled, swizzle the input and copy it over. + if (regs.dst_params.block_size.depth > 0) { + ASSERT(dst_params.layer == 0); + SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height, + bytes_per_pixel, block_height, block_depth, dst_params.origin.x, + dst_params.origin.y, write_buffer.data(), read_buffer.data()); + } else { + SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel, + write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(), + block_height, dst_params.origin.x, dst_params.origin.y); + } - if (write_buffer.size() < dst_size) { - write_buffer.resize(dst_size); - } + memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); +} - if (Settings::IsGPULevelExtreme()) { - memory_manager.ReadBlock(source, read_buffer.data(), src_size); - memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); - } else { - memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); - memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); - } +void MaxwellDMA::FastCopyBlockLinearToPitch() { + const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; + const size_t src_size = GOB_SIZE; + const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; + u32 pos_x = regs.src_params.origin.x; + u32 pos_y = regs.src_params.origin.y; + const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y, + regs.src_params.block_size.height, bytes_per_pixel); + const u32 x_in_gob = 64 / bytes_per_pixel; + pos_x = pos_x % x_in_gob; + pos_y = pos_y % 8; + + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } - // If the input is linear and the output is tiled, swizzle the input and copy it over. - Texture::SwizzleSubrect( - regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel, - write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(), - regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y); + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } - memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); + if (Settings::IsGPULevelExtreme()) { + memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size); + memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); + } else { + memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size); + memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); } + + UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width, + bytes_per_pixel, read_buffer.data(), write_buffer.data(), + regs.src_params.block_size.height, pos_x, pos_y); + + memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); } } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 502dd8509..50f445efc 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -24,160 +24,190 @@ class MemoryManager; namespace Tegra::Engines { /** - * This Engine is known as GK104_Copy. Documentation can be found in: + * This engine is known as gk104_copy. Documentation can be found in: + * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml */ class MaxwellDMA final : public EngineInterface { public: - explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); - ~MaxwellDMA() = default; - - /// Write the value to the register identified by method. - void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; - - /// Write multiple values to the register identified by method. - void CallMultiMethod(u32 method, const u32* base_start, u32 amount, - u32 methods_pending) override; + struct PackedGPUVAddr { + u32 upper; + u32 lower; + + constexpr operator GPUVAddr() const noexcept { + return (static_cast<GPUVAddr>(upper & 0xff) << 32) | lower; + } + }; + + union BlockSize { + BitField<0, 4, u32> width; + BitField<4, 4, u32> height; + BitField<8, 4, u32> depth; + BitField<12, 4, u32> gob_height; + }; + static_assert(sizeof(BlockSize) == 4); + + union Origin { + BitField<0, 16, u32> x; + BitField<16, 16, u32> y; + }; + static_assert(sizeof(Origin) == 4); + + struct Parameters { + BlockSize block_size; + u32 width; + u32 height; + u32 depth; + u32 layer; + Origin origin; + }; + static_assert(sizeof(Parameters) == 24); + + struct Semaphore { + PackedGPUVAddr address; + u32 payload; + }; + static_assert(sizeof(Semaphore) == 12); + + struct RenderEnable { + enum class Mode : u32 { + FALSE = 0, + TRUE = 1, + CONDITIONAL = 2, + RENDER_IF_EQUAL = 3, + RENDER_IF_NOT_EQUAL = 4, + }; - struct Regs { - static constexpr std::size_t NUM_REGS = 0x1D6; + PackedGPUVAddr address; + BitField<0, 3, Mode> mode; + }; + static_assert(sizeof(RenderEnable) == 12); + + enum class PhysModeTarget : u32 { + LOCAL_FB = 0, + COHERENT_SYSMEM = 1, + NONCOHERENT_SYSMEM = 2, + }; + using PhysMode = BitField<0, 2, PhysModeTarget>; + + union LaunchDMA { + enum class DataTransferType : u32 { + NONE = 0, + PIPELINED = 1, + NON_PIPELINED = 2, + }; - struct Parameters { - union { - BitField<0, 4, u32> block_depth; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_width; - }; - u32 size_x; - u32 size_y; - u32 size_z; - u32 pos_z; - union { - BitField<0, 16, u32> pos_x; - BitField<16, 16, u32> pos_y; - }; + enum class SemaphoreType : u32 { + NONE = 0, + RELEASE_ONE_WORD_SEMAPHORE = 1, + RELEASE_FOUR_WORD_SEMAPHORE = 2, + }; - u32 BlockHeight() const { - return block_height.Value(); - } + enum class InterruptType : u32 { + NONE = 0, + BLOCKING = 1, + NON_BLOCKING = 2, + }; - u32 BlockDepth() const { - return block_depth.Value(); - } + enum class MemoryLayout : u32 { + BLOCKLINEAR = 0, + PITCH = 1, }; - static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); + enum class Type : u32 { + VIRTUAL = 0, + PHYSICAL = 1, + }; - enum class ComponentMode : u32 { - Src0 = 0, - Src1 = 1, - Src2 = 2, - Src3 = 3, - Const0 = 4, - Const1 = 5, - Zero = 6, + enum class SemaphoreReduction : u32 { + IMIN = 0, + IMAX = 1, + IXOR = 2, + IAND = 3, + IOR = 4, + IADD = 5, + INC = 6, + DEC = 7, + FADD = 0xA, }; - enum class CopyMode : u32 { - None = 0, - Unk1 = 1, - Unk2 = 2, + enum class SemaphoreReductionSign : u32 { + SIGNED = 0, + UNSIGNED = 1, }; - enum class QueryMode : u32 { - None = 0, - Short = 1, - Long = 2, + enum class BypassL2 : u32 { + USE_PTE_SETTING = 0, + FORCE_VOLATILE = 1, }; - enum class QueryIntr : u32 { - None = 0, - Block = 1, - NonBlock = 2, + BitField<0, 2, DataTransferType> data_transfer_type; + BitField<2, 1, u32> flush_enable; + BitField<3, 2, SemaphoreType> semaphore_type; + BitField<5, 2, InterruptType> interrupt_type; + BitField<7, 1, MemoryLayout> src_memory_layout; + BitField<8, 1, MemoryLayout> dst_memory_layout; + BitField<9, 1, u32> multi_line_enable; + BitField<10, 1, u32> remap_enable; + BitField<11, 1, u32> rmwdisable; + BitField<12, 1, Type> src_type; + BitField<13, 1, Type> dst_type; + BitField<14, 4, SemaphoreReduction> semaphore_reduction; + BitField<18, 1, SemaphoreReductionSign> semaphore_reduction_sign; + BitField<19, 1, u32> reduction_enable; + BitField<20, 1, BypassL2> bypass_l2; + }; + static_assert(sizeof(LaunchDMA) == 4); + + struct RemapConst { + enum Swizzle : u32 { + SRC_X = 0, + SRC_Y = 1, + SRC_Z = 2, + SRC_W = 3, + CONST_A = 4, + CONST_B = 5, + NO_WRITE = 6, }; - union { - struct { - INSERT_UNION_PADDING_WORDS(0xC0); - - struct { - union { - BitField<0, 2, CopyMode> copy_mode; - BitField<2, 1, u32> flush; - - BitField<3, 2, QueryMode> query_mode; - BitField<5, 2, QueryIntr> query_intr; - - BitField<7, 1, u32> is_src_linear; - BitField<8, 1, u32> is_dst_linear; - - BitField<9, 1, u32> enable_2d; - BitField<10, 1, u32> enable_swizzle; - }; - } exec; - - INSERT_UNION_PADDING_WORDS(0x3F); - - struct { - u32 address_high; - u32 address_low; - - GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); - } - } src_address; - - struct { - u32 address_high; - u32 address_low; - - GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); - } - } dst_address; - - u32 src_pitch; - u32 dst_pitch; - u32 x_count; - u32 y_count; - - INSERT_UNION_PADDING_WORDS(0xB8); - - u32 const0; - u32 const1; - union { - BitField<0, 4, ComponentMode> component0; - BitField<4, 4, ComponentMode> component1; - BitField<8, 4, ComponentMode> component2; - BitField<12, 4, ComponentMode> component3; - BitField<16, 2, u32> component_size; - BitField<20, 3, u32> src_num_components; - BitField<24, 3, u32> dst_num_components; - - u32 SrcBytePerPixel() const { - return src_num_components.Value() * component_size.Value(); - } - u32 DstBytePerPixel() const { - return dst_num_components.Value() * component_size.Value(); - } - } swizzle_config; + PackedGPUVAddr address; - Parameters dst_params; + union { + BitField<0, 3, Swizzle> dst_x; + BitField<4, 3, Swizzle> dst_y; + BitField<8, 3, Swizzle> dst_z; + BitField<12, 3, Swizzle> dst_w; + BitField<16, 2, u32> component_size_minus_one; + BitField<20, 2, u32> num_src_components_minus_one; + BitField<24, 2, u32> num_dst_components_minus_one; + }; + }; + static_assert(sizeof(RemapConst) == 12); - INSERT_UNION_PADDING_WORDS(1); + explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); + ~MaxwellDMA() = default; - Parameters src_params; + /// Write the value to the register identified by method. + void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; - INSERT_UNION_PADDING_WORDS(0x13); - }; - std::array<u32, NUM_REGS> reg_array; - }; - } regs{}; + /// Write multiple values to the register identified by method. + void CallMultiMethod(u32 method, const u32* base_start, u32 amount, + u32 methods_pending) override; private: + /// Performs the copy from the source buffer to the destination buffer as configured in the + /// registers. + void Launch(); + + void CopyPitchToPitch(); + + void CopyBlockLinearToPitch(); + + void CopyPitchToBlockLinear(); + + void FastCopyBlockLinearToPitch(); + Core::System& system; MemoryManager& memory_manager; @@ -185,28 +215,58 @@ private: std::vector<u8> read_buffer; std::vector<u8> write_buffer; - /// Performs the copy from the source buffer to the destination buffer as configured in the - /// registers. - void HandleCopy(); -}; + static constexpr std::size_t NUM_REGS = 0x800; + struct Regs { + union { + struct { + u32 reserved[0x40]; + u32 nop; + u32 reserved01[0xf]; + u32 pm_trigger; + u32 reserved02[0x3f]; + Semaphore semaphore; + u32 reserved03[0x2]; + RenderEnable render_enable; + PhysMode src_phys_mode; + PhysMode dst_phys_mode; + u32 reserved04[0x26]; + LaunchDMA launch_dma; + u32 reserved05[0x3f]; + PackedGPUVAddr offset_in; + PackedGPUVAddr offset_out; + u32 pitch_in; + u32 pitch_out; + u32 line_length_in; + u32 line_count; + u32 reserved06[0xb8]; + RemapConst remap_const; + Parameters dst_params; + u32 reserved07[0x1]; + Parameters src_params; + u32 reserved08[0x275]; + u32 pm_trigger_end; + u32 reserved09[0x3ba]; + }; + std::array<u32, NUM_REGS> reg_array; + }; + } regs{}; #define ASSERT_REG_POSITION(field_name, position) \ static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(exec, 0xC0); -ASSERT_REG_POSITION(src_address, 0x100); -ASSERT_REG_POSITION(dst_address, 0x102); -ASSERT_REG_POSITION(src_pitch, 0x104); -ASSERT_REG_POSITION(dst_pitch, 0x105); -ASSERT_REG_POSITION(x_count, 0x106); -ASSERT_REG_POSITION(y_count, 0x107); -ASSERT_REG_POSITION(const0, 0x1C0); -ASSERT_REG_POSITION(const1, 0x1C1); -ASSERT_REG_POSITION(swizzle_config, 0x1C2); -ASSERT_REG_POSITION(dst_params, 0x1C3); -ASSERT_REG_POSITION(src_params, 0x1CA); + ASSERT_REG_POSITION(launch_dma, 0xC0); + ASSERT_REG_POSITION(offset_in, 0x100); + ASSERT_REG_POSITION(offset_out, 0x102); + ASSERT_REG_POSITION(pitch_in, 0x104); + ASSERT_REG_POSITION(pitch_out, 0x105); + ASSERT_REG_POSITION(line_length_in, 0x106); + ASSERT_REG_POSITION(line_count, 0x107); + ASSERT_REG_POSITION(remap_const, 0x1C0); + ASSERT_REG_POSITION(dst_params, 0x1C3); + ASSERT_REG_POSITION(src_params, 0x1CA); #undef ASSERT_REG_POSITION +}; } // namespace Tegra::Engines diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 758bfe148..8e19c3373 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -20,6 +20,7 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" +#include "video_core/shader_notify.h" #include "video_core/video_core.h" namespace Tegra { @@ -36,6 +37,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); + shader_notify = std::make_unique<VideoCore::ShaderNotify>(); } GPU::~GPU() = default; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index d646c441c..19a34c402 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -33,6 +33,7 @@ class System; namespace VideoCore { class RendererBase; +class ShaderNotify; } // namespace VideoCore namespace Tegra { @@ -215,6 +216,14 @@ public: return *renderer; } + VideoCore::ShaderNotify& ShaderNotify() { + return *shader_notify; + } + + const VideoCore::ShaderNotify& ShaderNotify() const { + return *shader_notify; + } + // Waits for the GPU to finish working virtual void WaitIdle() const = 0; @@ -355,6 +364,8 @@ private: std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; /// Inline memory engine std::unique_ptr<Engines::KeplerMemory> kepler_memory; + /// Shader build notifier + std::unique_ptr<VideoCore::ShaderNotify> shader_notify; std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 4d00b84b0..31ee3440a 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -103,8 +103,9 @@ public: virtual ~CachedMacro() = default; /** * Executes the macro code with the specified input parameters. - * @param code The macro byte code to execute + * * @param parameters The parameters of the macro + * @param method The method to execute */ virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0; }; diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 410f99018..0c9ff59a4 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -12,13 +12,11 @@ namespace Tegra { namespace { // HLE'd functions -static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, - const std::vector<u32>& parameters) { +void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); maxwell3d.regs.draw.topology.Assign( - static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & - ~(0x3ffffff << 26))); + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff)); maxwell3d.regs.vb_base_instance = parameters[5]; maxwell3d.mme_draw.instance_count = instance_count; maxwell3d.regs.vb_element_base = parameters[3]; @@ -33,8 +31,7 @@ static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } -static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, - const std::vector<u32>& parameters) { +void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); maxwell3d.regs.vertex_buffer.first = parameters[3]; @@ -52,8 +49,7 @@ static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } -static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, - const std::vector<u32>& parameters) { +void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); const u32 element_base = parameters[4]; const u32 base_instance = parameters[5]; @@ -81,12 +77,12 @@ static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, maxwell3d.CallMethodFromMME(0x8e5, 0x0); maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } -} // namespace +} // Anonymous namespace constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ - std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0), - std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD), - std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7), + {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, + {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD}, + {0x0217920100488FF7, &HLE_0217920100488FF7}, }}; HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index eb5158407..4489abf61 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -185,10 +185,6 @@ std::string TextureType(const MetaTexture& meta) { return type; } -std::string GlobalMemoryName(const GlobalMemoryBase& base) { - return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); -} - class ARBDecompiler final { public: explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, @@ -199,6 +195,8 @@ public: } private: + void DefineGlobalMemory(); + void DeclareHeader(); void DeclareVertex(); void DeclareGeometry(); @@ -228,6 +226,7 @@ private: std::pair<std::string, std::size_t> BuildCoords(Operation); std::string BuildAoffi(Operation); + std::string GlobalMemoryPointer(const GmemNode& gmem); void Exit(); std::string Assign(Operation); @@ -378,10 +377,8 @@ private: std::string address; std::string_view opname; if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); - opname = "ATOMB"; + address = GlobalMemoryPointer(*gmem); + opname = "ATOM"; } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); opname = "ATOMS"; @@ -456,9 +453,13 @@ private: shader_source += '\n'; } - std::string AllocTemporary() { - max_temporaries = std::max(max_temporaries, num_temporaries + 1); - return fmt::format("T{}.x", num_temporaries++); + std::string AllocLongVectorTemporary() { + max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); + return fmt::format("L{}", num_long_temporaries++); + } + + std::string AllocLongTemporary() { + return fmt::format("{}.x", AllocLongVectorTemporary()); } std::string AllocVectorTemporary() { @@ -466,8 +467,13 @@ private: return fmt::format("T{}", num_temporaries++); } + std::string AllocTemporary() { + return fmt::format("{}.x", AllocVectorTemporary()); + } + void ResetTemporaries() noexcept { num_temporaries = 0; + num_long_temporaries = 0; } const Device& device; @@ -478,6 +484,11 @@ private: std::size_t num_temporaries = 0; std::size_t max_temporaries = 0; + std::size_t num_long_temporaries = 0; + std::size_t max_long_temporaries = 0; + + std::map<GlobalMemoryBase, u32> global_memory_names; + std::string shader_source; static constexpr std::string_view ADD_F32 = "ADD.F32"; @@ -784,6 +795,8 @@ private: ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, ShaderType stage, std::string_view identifier) : device{device}, ir{ir}, registry{registry}, stage{stage} { + DefineGlobalMemory(); + AddLine("TEMP RC;"); AddLine("TEMP FSWZA[4];"); AddLine("TEMP FSWZB[4];"); @@ -829,12 +842,20 @@ std::string_view HeaderStageName(ShaderType stage) { } } +void ARBDecompiler::DefineGlobalMemory() { + u32 binding = 0; + for (const auto& pair : ir.GetGlobalMemory()) { + const GlobalMemoryBase base = pair.first; + global_memory_names.emplace(base, binding); + ++binding; + } +} + void ARBDecompiler::DeclareHeader() { AddLine("!!NV{}5.0", HeaderStageName(stage)); // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D AddLine("OPTION NV_internal;"); AddLine("OPTION NV_gpu_program_fp64;"); - AddLine("OPTION NV_shader_storage_buffer;"); AddLine("OPTION NV_shader_thread_group;"); if (ir.UsesWarps() && device.HasWarpIntrinsics()) { AddLine("OPTION NV_shader_thread_shuffle;"); @@ -951,11 +972,10 @@ void ARBDecompiler::DeclareLocalMemory() { } void ARBDecompiler::DeclareGlobalMemory() { - u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; - for (const auto& pair : ir.GetGlobalMemory()) { - const auto& base = pair.first; - AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); - ++binding; + const std::size_t num_entries = ir.GetGlobalMemory().size(); + if (num_entries > 0) { + const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; + AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1); } } @@ -977,6 +997,9 @@ void ARBDecompiler::DeclareTemporaries() { for (std::size_t i = 0; i < max_temporaries; ++i) { AddLine("TEMP T{};", i); } + for (std::size_t i = 0; i < max_long_temporaries; ++i) { + AddLine("LONG TEMP L{};", i); + } } void ARBDecompiler::DeclarePredicates() { @@ -1339,10 +1362,7 @@ std::string ARBDecompiler::Visit(const Node& node) { if (const auto gmem = std::get_if<GmemNode>(&*node)) { std::string temporary = AllocTemporary(); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), - temporary); + AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem)); return temporary; } @@ -1419,6 +1439,22 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) { return fmt::format(", offset({})", temporary); } +std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { + const u32 binding = global_memory_names.at(gmem.GetDescriptor()); + const char result_swizzle = binding % 2 == 0 ? 'x' : 'y'; + + const std::string pointer = AllocLongVectorTemporary(); + std::string temporary = AllocTemporary(); + + const u32 local_index = binding / 2; + AddLine("PK64.U {}, c[{}];", pointer, local_index); + AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), + Visit(gmem.GetBaseAddress())); + AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); + AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); + return fmt::format("{}.x", pointer); +} + void ARBDecompiler::Exit() { if (stage != ShaderType::Fragment) { AddLine("RET;"); @@ -1515,11 +1551,7 @@ std::string ARBDecompiler::Assign(Operation operation) { ResetTemporaries(); return {}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const std::string temporary = AllocTemporary(); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), - temporary); + AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); ResetTemporaries(); return {}; } else { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index e461e4c70..e866d8f2f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -26,7 +26,7 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); - if (device.HasVertexBufferUnifiedMemory()) { + if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c1f20f0ab..630acb73b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -233,6 +233,8 @@ Device::Device() GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); + LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e1d811966..94d38d7d1 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -104,6 +104,10 @@ public: return use_assembly_shaders; } + bool UseAsynchronousShaders() const { + return use_asynchronous_shaders; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -127,6 +131,7 @@ private: bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; bool use_assembly_shaders{}; + bool use_asynchronous_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e960a0ef1..03e82c599 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -139,6 +139,18 @@ void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } +void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { + if (num_entries == 0) { + return; + } + if (num_entries % 2 == 1) { + pointers[num_entries] = 0; + } + const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2); + glProgramLocalParametersI4uivNV(target, 0, num_vectors, + reinterpret_cast<const GLuint*>(pointers)); +} + } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, @@ -149,7 +161,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, - screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { + screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, + async_shaders{emu_window} { CheckExtensions(); unified_uniform_buffer.Create(); @@ -162,6 +175,23 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind nullptr, 0); } } + + if (device.UseAsynchronousShaders()) { + // Max worker threads we should allow + constexpr auto MAX_THREADS = 2u; + // Amount of threads we should reserve for other parts of yuzu + constexpr auto RESERVED_THREADS = 6u; + // Get the amount of threads we can use(this can return zero) + const auto cpu_thread_count = + std::max(RESERVED_THREADS, std::thread::hardware_concurrency()); + // Deduce how many "extra" threads we have to use. + const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS; + // Always allow at least 1 thread regardless of our settings + const auto max_worker_count = std::max(1u, max_threads_unused); + // Don't use more than MAX_THREADS + const auto worker_count = std::min(max_worker_count, MAX_THREADS); + async_shaders.AllocateWorkers(worker_count); + } } RasterizerOpenGL::~RasterizerOpenGL() { @@ -306,7 +336,6 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); auto& gpu = system.GPU().Maxwell3D(); - std::size_t num_ssbos = 0; u32 clip_distances = 0; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -329,31 +358,15 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } // Currently this stages are not supported in the OpenGL backend. - // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL - if (program == Maxwell::ShaderProgram::TesselationControl) { - continue; - } else if (program == Maxwell::ShaderProgram::TesselationEval) { + // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL + if (program == Maxwell::ShaderProgram::TesselationControl || + program == Maxwell::ShaderProgram::TesselationEval) { continue; } - Shader* const shader = shader_cache.GetStageProgram(program); - - if (device.UseAssemblyShaders()) { - // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this - // all stages share the same bindings. - const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size(); - ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage"); - num_ssbos += num_stage_ssbos; - } + Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - // Stage indices are 0 - 5 - const std::size_t stage = index == 0 ? 0 : index - 1; - SetupDrawConstBuffers(stage, shader); - SetupDrawGlobalMemory(stage, shader); - SetupDrawTextures(stage, shader); - SetupDrawImages(stage, shader); - - const GLuint program_handle = shader->GetHandle(); + const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; switch (program) { case Maxwell::ShaderProgram::VertexA: case Maxwell::ShaderProgram::VertexB: @@ -370,6 +383,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { shader_config.enable.Value(), shader_config.offset); } + // Stage indices are 0 - 5 + const std::size_t stage = index == 0 ? 0 : index - 1; + SetupDrawConstBuffers(stage, shader); + SetupDrawGlobalMemory(stage, shader); + SetupDrawTextures(stage, shader); + SetupDrawImages(stage, shader); + // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the @@ -731,6 +751,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { current_cbuf = 0; auto kernel = shader_cache.GetComputeKernel(code_addr); + program_manager.BindCompute(kernel->GetHandle()); + SetupComputeTextures(kernel); SetupComputeImages(kernel); @@ -745,7 +767,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - program_manager.BindCompute(kernel->GetHandle()); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; } @@ -1005,40 +1026,66 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { + static constexpr std::array TARGET_LUT = { + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, + }; + auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; + const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; + const auto& entries{shader->GetEntries().global_memory_entries}; + + std::array<GLuint64EXT, 32> pointers; + ASSERT(entries.size() < pointers.size()); - u32 binding = - device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; - for (const auto& entry : shader->GetEntries().global_memory_entries) { + const bool assembly_shaders = device.UseAssemblyShaders(); + u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; + for (const auto& entry : entries) { const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; const u32 size{memory_manager.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding++, entry, gpu_addr, size); + SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + ++binding; + } + if (assembly_shaders) { + UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size()); } } void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; + const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; + const auto& entries{kernel->GetEntries().global_memory_entries}; + + std::array<GLuint64EXT, 32> pointers; + ASSERT(entries.size() < pointers.size()); u32 binding = 0; - for (const auto& entry : kernel->GetEntries().global_memory_entries) { - const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; - const auto gpu_addr{memory_manager.Read<u64>(addr)}; - const auto size{memory_manager.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding++, entry, gpu_addr, size); + for (const auto& entry : entries) { + const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; + const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; + const u32 size{memory_manager.Read<u32>(addr + 8)}; + SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + ++binding; + } + if (device.UseAssemblyShaders()) { + UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size()); } } void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, - GPUVAddr gpu_addr, std::size_t size) { - const auto alignment{device.GetShaderStorageBufferAlignment()}; + GPUVAddr gpu_addr, std::size_t size, + GLuint64EXT* pointer) { + const std::size_t alignment{device.GetShaderStorageBufferAlignment()}; const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, - static_cast<GLsizeiptr>(size)); + if (device.UseAssemblyShaders()) { + *pointer = info.address + info.offset; + } else { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, + static_cast<GLsizeiptr>(size)); + } } void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4f082592f..ccc6f50f6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -33,6 +33,7 @@ #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" namespace Core { @@ -91,6 +92,14 @@ public: return num_queued_commands > 0; } + VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { + return async_shaders; + } + + const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { + return async_shaders; + } + private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); @@ -115,9 +124,9 @@ private: /// Configures the current global memory entries to use for the kernel invocation. void SetupComputeGlobalMemory(Shader* kernel); - /// Configures a constant buffer. + /// Configures a global memory buffer. void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, - std::size_t size); + std::size_t size, GLuint64EXT* pointer); /// Configures the current textures to use for the draw command. void SetupDrawTextures(std::size_t stage_index, Shader* shader); @@ -242,6 +251,7 @@ private: ScreenInfo& screen_info; ProgramManager& program_manager; StateTracker& state_tracker; + VideoCommon::Shader::AsyncShaders async_shaders; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index f8b322227..b05cb641c 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -177,6 +177,12 @@ public: Release(); } + OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + /// Deletes the internal OpenGL resource void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c6a3bf3a1..f469ed656 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -31,6 +31,7 @@ #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" +#include "video_core/shader_notify.h" namespace OpenGL { @@ -140,9 +141,24 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { return registry; } +std::unordered_set<GLenum> GetSupportedFormats() { + GLint num_formats; + glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); + + std::vector<GLint> formats(num_formats); + glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); + + std::unordered_set<GLenum> supported_formats; + for (const GLint format : formats) { + supported_formats.insert(static_cast<GLenum>(format)); + } + return supported_formats; +} + +} // Anonymous namespace + ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, - bool hint_retrievable = false) { + const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { const std::string shader_id = MakeShaderID(unique_identifier, shader_type); LOG_INFO(Render_OpenGL, "{}", shader_id); @@ -181,30 +197,17 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u return program; } -std::unordered_set<GLenum> GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - - std::vector<GLint> formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - - std::unordered_set<GLenum> supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast<GLenum>(format)); - } - return supported_formats; -} - -} // Anonymous namespace - Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, - ProgramSharedPtr program_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} { + ProgramSharedPtr program_, bool is_built) + : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, + is_built(is_built) { handle = program->assembly_program.handle; if (handle == 0) { handle = program->source_program.handle; } - ASSERT(handle != 0); + if (is_built) { + ASSERT(handle != 0); + } } Shader::~Shader() = default; @@ -214,42 +217,82 @@ GLuint Shader::GetHandle() const { return handle; } -std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode code, ProgramCode code_b) { +bool Shader::IsBuilt() const { + return is_built; +} + +void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { + program->source_program = std::move(new_program); + handle = program->source_program.handle; + is_built = true; +} + +void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { + program->assembly_program = std::move(new_program); + handle = program->assembly_program.handle; + is_built = true; +} + +std::unique_ptr<Shader> Shader::CreateStageFromMemory( + const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, + ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { const auto shader_type = GetShaderType(program_type); const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D()); - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional<ShaderIR> ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); + auto& gpu = params.system.GPU(); + gpu.ShaderNotify().MarkSharderBuilding(); + + auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); + if (!async_shaders.IsShaderAsync(params.system.GPU()) || + !params.device.UseAsynchronousShaders()) { + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional<ShaderIR> ir_b; + // if (!code_b.empty()) { + // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); + // } + auto program = + BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); + ShaderDiskCacheEntry entry; + entry.type = shader_type; + entry.code = std::move(code); + entry.code_b = std::move(code_b); + entry.unique_identifier = params.unique_identifier; + entry.bound_buffer = registry->GetBoundBuffer(); + entry.graphics_info = registry->GetGraphicsInfo(); + entry.keys = registry->GetKeys(); + entry.bound_samplers = registry->GetBoundSamplers(); + entry.bindless_samplers = registry->GetBindlessSamplers(); + params.disk_cache.SaveEntry(std::move(entry)); + + gpu.ShaderNotify().MarkShaderComplete(); + + return std::unique_ptr<Shader>(new Shader(std::move(registry), + MakeEntries(params.device, ir, shader_type), + std::move(program), true)); + } else { + // Required for entries + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + auto entries = MakeEntries(params.device, ir, shader_type); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); + async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, + std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, + COMPILER_SETTINGS, *registry, cpu_addr); - return std::unique_ptr<Shader>(new Shader( - std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program))); + auto program = std::make_shared<ProgramHandle>(); + return std::unique_ptr<Shader>( + new Shader(std::move(registry), std::move(entries), std::move(program), false)); + } } std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto& engine = params.system.GPU().KeplerCompute(); + auto& gpu = params.system.GPU(); + gpu.ShaderNotify().MarkSharderBuilding(); + + auto& engine = gpu.KeplerCompute(); auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); const u64 uid = params.unique_identifier; @@ -266,6 +309,8 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); + gpu.ShaderNotify().MarkShaderComplete(); + return std::unique_ptr<Shader>(new Shader(std::move(registry), MakeEntries(params.device, ir, ShaderType::Compute), std::move(program))); @@ -436,14 +481,51 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( return program; } -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { +Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, + VideoCommon::Shader::AsyncShaders& async_shaders) { if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { - return last_shaders[static_cast<std::size_t>(program)]; + auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; + if (last_shader->IsBuilt()) { + return last_shader; + } } auto& memory_manager{system.GPU().MemoryManager()}; const GPUVAddr address{GetShaderAddress(system, program)}; + if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { + auto completed_work = async_shaders.GetCompletedWork(); + for (auto& work : completed_work) { + Shader* shader = TryGet(work.cpu_address); + auto& gpu = system.GPU(); + gpu.ShaderNotify().MarkShaderComplete(); + if (shader == nullptr) { + continue; + } + using namespace VideoCommon::Shader; + if (work.backend == AsyncShaders::Backend::OpenGL) { + shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); + } else if (work.backend == AsyncShaders::Backend::GLASM) { + shader->AsyncGLASMBuilt(std::move(work.program.glasm)); + } + + ShaderDiskCacheEntry entry; + entry.type = work.shader_type; + entry.code = std::move(work.code); + entry.code_b = std::move(work.code_b); + entry.unique_identifier = work.uid; + + auto& registry = shader->GetRegistry(); + + entry.bound_buffer = registry.GetBoundBuffer(); + entry.graphics_info = registry.GetGraphicsInfo(); + entry.keys = registry.GetKeys(); + entry.bound_samplers = registry.GetBoundSamplers(); + entry.bindless_samplers = registry.GetBindlessSamplers(); + disk_cache.SaveEntry(std::move(entry)); + } + } + // Look up shader in the cache based on address const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { @@ -471,7 +553,8 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { std::unique_ptr<Shader> shader; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); + shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), + async_shaders, cpu_addr.value_or(0)); } else { shader = Shader::CreateFromCache(params, found->second); } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 994aaeaf2..7528ac686 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -33,6 +33,10 @@ namespace Core::Frontend { class EmuWindow; } +namespace VideoCommon::Shader { +class AsyncShaders; +} + namespace OpenGL { class Device; @@ -61,6 +65,11 @@ struct ShaderParameters { u64 unique_identifier; }; +ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, + u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, + const VideoCommon::Shader::Registry& registry, + bool hint_retrievable = false); + class Shader final { public: ~Shader(); @@ -68,15 +77,28 @@ public: /// Gets the GL program handle for the shader GLuint GetHandle() const; + bool IsBuilt() const; + /// Gets the shader entries for the shader const ShaderEntries& GetEntries() const { return entries; } - static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode program_code, - ProgramCode program_code_b); + const VideoCommon::Shader::Registry& GetRegistry() const { + return *registry; + } + + /// Mark a OpenGL shader as built + void AsyncOpenGLBuilt(OGLProgram new_program); + + /// Mark a GLASM shader as built + void AsyncGLASMBuilt(OGLAssemblyProgram new_program); + + static std::unique_ptr<Shader> CreateStageFromMemory( + const ShaderParameters& params, Maxwell::ShaderProgram program_type, + ProgramCode program_code, ProgramCode program_code_b, + VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); + static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); @@ -85,12 +107,13 @@ public: private: explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, - ProgramSharedPtr program); + ProgramSharedPtr program, bool is_built = true); std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; ProgramSharedPtr program; GLuint handle = 0; + bool is_built{}; }; class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { @@ -104,7 +127,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program); + Shader* GetStageProgram(Maxwell::ShaderProgram program, + VideoCommon::Shader::AsyncShaders& async_shaders); /// Gets a compute kernel in the passed address Shader* GetComputeKernel(GPUVAddr code_addr); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 8e754fa90..691c6c79b 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -11,8 +11,30 @@ namespace OpenGL { -ProgramManager::ProgramManager(const Device& device) { - use_assembly_programs = device.UseAssemblyShaders(); +namespace { + +void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { + if (current == old) { + return; + } + if (current == 0) { + if (enabled) { + enabled = false; + glDisable(stage); + } + return; + } + if (!enabled) { + enabled = true; + glEnable(stage); + } + glBindProgramARB(stage, current); +} + +} // Anonymous namespace + +ProgramManager::ProgramManager(const Device& device) + : use_assembly_programs{device.UseAssemblyShaders()} { if (use_assembly_programs) { glEnable(GL_COMPUTE_PROGRAM_NV); } else { @@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) { } void ProgramManager::BindGraphicsPipeline() { - if (use_assembly_programs) { - UpdateAssemblyPrograms(); - } else { + if (!use_assembly_programs) { UpdateSourcePrograms(); } } @@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() { } } -void ProgramManager::UpdateAssemblyPrograms() { - const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) { - if (current == old) { - return; - } - if (current == 0) { - if (enabled) { - enabled = false; - glDisable(stage); - } - return; - } - if (!enabled) { - enabled = true; - glEnable(stage); - } - glBindProgramARB(stage, current); - }; +void ProgramManager::UseVertexShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); + } + current_state.vertex = program; +} - update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex); - update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry, - old_state.geometry); - update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment, - old_state.fragment); +void ProgramManager::UseGeometryShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); + } + current_state.geometry = program; +} - old_state = current_state; +void ProgramManager::UseFragmentShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); + } + current_state.fragment = program; } void ProgramManager::UpdateSourcePrograms() { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 0f03b4f12..950e0dfcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -45,17 +45,9 @@ public: /// Rewinds BindHostPipeline state changes. void RestoreGuestPipeline(); - void UseVertexShader(GLuint program) { - current_state.vertex = program; - } - - void UseGeometryShader(GLuint program) { - current_state.geometry = program; - } - - void UseFragmentShader(GLuint program) { - current_state.fragment = program; - } + void UseVertexShader(GLuint program); + void UseGeometryShader(GLuint program); + void UseFragmentShader(GLuint program); private: struct PipelineState { @@ -64,9 +56,6 @@ private: GLuint fragment = 0; }; - /// Update NV_gpu_program5 programs. - void UpdateAssemblyPrograms(); - /// Update GLSL programs. void UpdateSourcePrograms(); diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 3655ff629..887995cf4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -35,7 +35,7 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver mapped_ptr = static_cast<u8*>( glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); - if (device.HasVertexBufferUnifiedMemory()) { + if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index be175a829..ce53e5a6b 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -141,24 +141,28 @@ struct ScreenRectVertex { std::array<f32, 2> tex_coord; static VkVertexInputBindingDescription GetDescription() { - VkVertexInputBindingDescription description; - description.binding = 0; - description.stride = sizeof(ScreenRectVertex); - description.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; - return description; + return { + .binding = 0, + .stride = sizeof(ScreenRectVertex), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }; } static std::array<VkVertexInputAttributeDescription, 2> GetAttributes() { - std::array<VkVertexInputAttributeDescription, 2> attributes; - attributes[0].location = 0; - attributes[0].binding = 0; - attributes[0].format = VK_FORMAT_R32G32_SFLOAT; - attributes[0].offset = offsetof(ScreenRectVertex, position); - attributes[1].location = 1; - attributes[1].binding = 0; - attributes[1].format = VK_FORMAT_R32G32_SFLOAT; - attributes[1].offset = offsetof(ScreenRectVertex, tex_coord); - return attributes; + return {{ + { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(ScreenRectVertex, position), + }, + { + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(ScreenRectVertex, tex_coord), + }, + }}; } }; @@ -267,20 +271,25 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkBufferImageCopy copy; - copy.bufferOffset = image_offset; - copy.bufferRowLength = 0; - copy.bufferImageHeight = 0; - copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - copy.imageSubresource.mipLevel = 0; - copy.imageSubresource.baseArrayLayer = 0; - copy.imageSubresource.layerCount = 1; - copy.imageOffset.x = 0; - copy.imageOffset.y = 0; - copy.imageOffset.z = 0; - copy.imageExtent.width = framebuffer.width; - copy.imageExtent.height = framebuffer.height; - copy.imageExtent.depth = 1; + const VkBufferImageCopy copy{ + .bufferOffset = image_offset, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {.x = 0, .y = 0, .z = 0}, + .imageExtent = + { + .width = framebuffer.width, + .height = framebuffer.height, + .depth = 1, + }, + }; scheduler.Record( [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); @@ -295,11 +304,9 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon descriptor_set = descriptor_sets[image_index], buffer = *buffer, size = swapchain.GetSize(), pipeline = *pipeline, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { - VkClearValue clear_color; - clear_color.color.float32[0] = 0.0f; - clear_color.color.float32[1] = 0.0f; - clear_color.color.float32[2] = 0.0f; - clear_color.color.float32[3] = 0.0f; + const VkClearValue clear_color{ + .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, + }; VkRenderPassBeginInfo renderpass_bi; renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; @@ -379,93 +386,109 @@ void VKBlitScreen::CreateSemaphores() { } void VKBlitScreen::CreateDescriptorPool() { - std::array<VkDescriptorPoolSize, 2> pool_sizes; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - pool_sizes[0].descriptorCount = static_cast<u32>(image_count); - pool_sizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - pool_sizes[1].descriptorCount = static_cast<u32>(image_count); - - VkDescriptorPoolCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - ci.maxSets = static_cast<u32>(image_count); - ci.poolSizeCount = static_cast<u32>(pool_sizes.size()); - ci.pPoolSizes = pool_sizes.data(); + const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ + { + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = static_cast<u32>(image_count), + }, + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = static_cast<u32>(image_count), + }, + }}; + + const VkDescriptorPoolCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = static_cast<u32>(image_count), + .poolSizeCount = static_cast<u32>(pool_sizes.size()), + .pPoolSizes = pool_sizes.data(), + }; descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); } void VKBlitScreen::CreateRenderPass() { - VkAttachmentDescription color_attachment; - color_attachment.flags = 0; - color_attachment.format = swapchain.GetImageFormat(); - color_attachment.samples = VK_SAMPLE_COUNT_1_BIT; - color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - color_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - color_attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - - VkAttachmentReference color_attachment_ref; - color_attachment_ref.attachment = 0; - color_attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - - VkSubpassDescription subpass_description; - subpass_description.flags = 0; - subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass_description.inputAttachmentCount = 0; - subpass_description.pInputAttachments = nullptr; - subpass_description.colorAttachmentCount = 1; - subpass_description.pColorAttachments = &color_attachment_ref; - subpass_description.pResolveAttachments = nullptr; - subpass_description.pDepthStencilAttachment = nullptr; - subpass_description.preserveAttachmentCount = 0; - subpass_description.pPreserveAttachments = nullptr; - - VkSubpassDependency dependency; - dependency.srcSubpass = VK_SUBPASS_EXTERNAL; - dependency.dstSubpass = 0; - dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dependency.srcAccessMask = 0; - dependency.dstAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - dependency.dependencyFlags = 0; - - VkRenderPassCreateInfo renderpass_ci; - renderpass_ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - renderpass_ci.pNext = nullptr; - renderpass_ci.flags = 0; - renderpass_ci.attachmentCount = 1; - renderpass_ci.pAttachments = &color_attachment; - renderpass_ci.subpassCount = 1; - renderpass_ci.pSubpasses = &subpass_description; - renderpass_ci.dependencyCount = 1; - renderpass_ci.pDependencies = &dependency; + const VkAttachmentDescription color_attachment{ + .flags = 0, + .format = swapchain.GetImageFormat(), + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + }; + + const VkAttachmentReference color_attachment_ref{ + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }; + + const VkSubpassDescription subpass_description{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = 1, + .pColorAttachments = &color_attachment_ref, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = nullptr, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + + const VkSubpassDependency dependency{ + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dependencyFlags = 0, + }; + + const VkRenderPassCreateInfo renderpass_ci{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = 1, + .pAttachments = &color_attachment, + .subpassCount = 1, + .pSubpasses = &subpass_description, + .dependencyCount = 1, + .pDependencies = &dependency, + }; renderpass = device.GetLogical().CreateRenderPass(renderpass_ci); } void VKBlitScreen::CreateDescriptorSetLayout() { - std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings; - layout_bindings[0].binding = 0; - layout_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - layout_bindings[0].descriptorCount = 1; - layout_bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - layout_bindings[0].pImmutableSamplers = nullptr; - layout_bindings[1].binding = 1; - layout_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - layout_bindings[1].descriptorCount = 1; - layout_bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - layout_bindings[1].pImmutableSamplers = nullptr; - - VkDescriptorSetLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.bindingCount = static_cast<u32>(layout_bindings.size()); - ci.pBindings = layout_bindings.data(); + const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = nullptr, + }, + }}; + + const VkDescriptorSetLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(layout_bindings.size()), + .pBindings = layout_bindings.data(), + }; descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); } @@ -473,175 +496,192 @@ void VKBlitScreen::CreateDescriptorSetLayout() { void VKBlitScreen::CreateDescriptorSets() { const std::vector layouts(image_count, *descriptor_set_layout); - VkDescriptorSetAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.descriptorPool = *descriptor_pool; - ai.descriptorSetCount = static_cast<u32>(image_count); - ai.pSetLayouts = layouts.data(); + const VkDescriptorSetAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = *descriptor_pool, + .descriptorSetCount = static_cast<u32>(image_count), + .pSetLayouts = layouts.data(), + }; + descriptor_sets = descriptor_pool.Allocate(ai); } void VKBlitScreen::CreatePipelineLayout() { - VkPipelineLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.setLayoutCount = 1; - ci.pSetLayouts = descriptor_set_layout.address(); - ci.pushConstantRangeCount = 0; - ci.pPushConstantRanges = nullptr; + const VkPipelineLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); } void VKBlitScreen::CreateGraphicsPipeline() { - std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages; - shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - shader_stages[0].pNext = nullptr; - shader_stages[0].flags = 0; - shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; - shader_stages[0].module = *vertex_shader; - shader_stages[0].pName = "main"; - shader_stages[0].pSpecializationInfo = nullptr; - shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - shader_stages[1].pNext = nullptr; - shader_stages[1].flags = 0; - shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; - shader_stages[1].module = *fragment_shader; - shader_stages[1].pName = "main"; - shader_stages[1].pSpecializationInfo = nullptr; + const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = *vertex_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = *fragment_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + }}; const auto vertex_binding_description = ScreenRectVertex::GetDescription(); const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); - VkPipelineVertexInputStateCreateInfo vertex_input_ci; - vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vertex_input_ci.pNext = nullptr; - vertex_input_ci.flags = 0; - vertex_input_ci.vertexBindingDescriptionCount = 1; - vertex_input_ci.pVertexBindingDescriptions = &vertex_binding_description; - vertex_input_ci.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}; - vertex_input_ci.pVertexAttributeDescriptions = vertex_attrs_description.data(); - - VkPipelineInputAssemblyStateCreateInfo input_assembly_ci; - input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - input_assembly_ci.pNext = nullptr; - input_assembly_ci.flags = 0; - input_assembly_ci.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; - input_assembly_ci.primitiveRestartEnable = VK_FALSE; - - VkPipelineViewportStateCreateInfo viewport_state_ci; - viewport_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewport_state_ci.pNext = nullptr; - viewport_state_ci.flags = 0; - viewport_state_ci.viewportCount = 1; - viewport_state_ci.pViewports = nullptr; - viewport_state_ci.scissorCount = 1; - viewport_state_ci.pScissors = nullptr; - - VkPipelineRasterizationStateCreateInfo rasterization_ci; - rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rasterization_ci.pNext = nullptr; - rasterization_ci.flags = 0; - rasterization_ci.depthClampEnable = VK_FALSE; - rasterization_ci.rasterizerDiscardEnable = VK_FALSE; - rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; - rasterization_ci.cullMode = VK_CULL_MODE_NONE; - rasterization_ci.frontFace = VK_FRONT_FACE_CLOCKWISE; - rasterization_ci.depthBiasEnable = VK_FALSE; - rasterization_ci.depthBiasConstantFactor = 0.0f; - rasterization_ci.depthBiasClamp = 0.0f; - rasterization_ci.depthBiasSlopeFactor = 0.0f; - rasterization_ci.lineWidth = 1.0f; - - VkPipelineMultisampleStateCreateInfo multisampling_ci; - multisampling_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisampling_ci.pNext = nullptr; - multisampling_ci.flags = 0; - multisampling_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - multisampling_ci.sampleShadingEnable = VK_FALSE; - multisampling_ci.minSampleShading = 0.0f; - multisampling_ci.pSampleMask = nullptr; - multisampling_ci.alphaToCoverageEnable = VK_FALSE; - multisampling_ci.alphaToOneEnable = VK_FALSE; - - VkPipelineColorBlendAttachmentState color_blend_attachment; - color_blend_attachment.blendEnable = VK_FALSE; - color_blend_attachment.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO; - color_blend_attachment.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; - color_blend_attachment.colorBlendOp = VK_BLEND_OP_ADD; - color_blend_attachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; - color_blend_attachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; - color_blend_attachment.alphaBlendOp = VK_BLEND_OP_ADD; - color_blend_attachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; - - VkPipelineColorBlendStateCreateInfo color_blend_ci; - color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - color_blend_ci.flags = 0; - color_blend_ci.pNext = nullptr; - color_blend_ci.logicOpEnable = VK_FALSE; - color_blend_ci.logicOp = VK_LOGIC_OP_COPY; - color_blend_ci.attachmentCount = 1; - color_blend_ci.pAttachments = &color_blend_attachment; - color_blend_ci.blendConstants[0] = 0.0f; - color_blend_ci.blendConstants[1] = 0.0f; - color_blend_ci.blendConstants[2] = 0.0f; - color_blend_ci.blendConstants[3] = 0.0f; - - static constexpr std::array dynamic_states = {VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR}; - VkPipelineDynamicStateCreateInfo dynamic_state_ci; - dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_ci.pNext = nullptr; - dynamic_state_ci.flags = 0; - dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size()); - dynamic_state_ci.pDynamicStates = dynamic_states.data(); - - VkGraphicsPipelineCreateInfo pipeline_ci; - pipeline_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - pipeline_ci.pNext = nullptr; - pipeline_ci.flags = 0; - pipeline_ci.stageCount = static_cast<u32>(shader_stages.size()); - pipeline_ci.pStages = shader_stages.data(); - pipeline_ci.pVertexInputState = &vertex_input_ci; - pipeline_ci.pInputAssemblyState = &input_assembly_ci; - pipeline_ci.pTessellationState = nullptr; - pipeline_ci.pViewportState = &viewport_state_ci; - pipeline_ci.pRasterizationState = &rasterization_ci; - pipeline_ci.pMultisampleState = &multisampling_ci; - pipeline_ci.pDepthStencilState = nullptr; - pipeline_ci.pColorBlendState = &color_blend_ci; - pipeline_ci.pDynamicState = &dynamic_state_ci; - pipeline_ci.layout = *pipeline_layout; - pipeline_ci.renderPass = *renderpass; - pipeline_ci.subpass = 0; - pipeline_ci.basePipelineHandle = 0; - pipeline_ci.basePipelineIndex = 0; + const VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &vertex_binding_description, + .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}, + .pVertexAttributeDescriptions = vertex_attrs_description.data(), + }; + + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = VK_FALSE, + }; + + const VkPipelineViewportStateCreateInfo viewport_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = 1, + .pViewports = nullptr, + .scissorCount = 1, + .pScissors = nullptr, + }; + + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + + const VkPipelineMultisampleStateCreateInfo multisampling_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + + const VkPipelineColorBlendAttachmentState color_blend_attachment{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + + static constexpr std::array dynamic_states{ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }; + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast<u32>(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + const VkGraphicsPipelineCreateInfo pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = nullptr, + .pViewportState = &viewport_state_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisampling_ci, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = *renderpass, + .subpass = 0, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }; pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci); } void VKBlitScreen::CreateSampler() { - VkSamplerCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.magFilter = VK_FILTER_LINEAR; - ci.minFilter = VK_FILTER_NEAREST; - ci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - ci.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - ci.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - ci.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - ci.mipLodBias = 0.0f; - ci.anisotropyEnable = VK_FALSE; - ci.maxAnisotropy = 0.0f; - ci.compareEnable = VK_FALSE; - ci.compareOp = VK_COMPARE_OP_NEVER; - ci.minLod = 0.0f; - ci.maxLod = 0.0f; - ci.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; - ci.unnormalizedCoordinates = VK_FALSE; + const VkSamplerCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = VK_FILTER_LINEAR, + .minFilter = VK_FILTER_NEAREST, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, + .unnormalizedCoordinates = VK_FALSE, + }; sampler = device.GetLogical().CreateSampler(ci); } @@ -650,15 +690,16 @@ void VKBlitScreen::CreateFramebuffers() { const VkExtent2D size{swapchain.GetSize()}; framebuffers.resize(image_count); - VkFramebufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.renderPass = *renderpass; - ci.attachmentCount = 1; - ci.width = size.width; - ci.height = size.height; - ci.layers = 1; + VkFramebufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = *renderpass, + .attachmentCount = 1, + .width = size.width, + .height = size.height, + .layers = 1, + }; for (std::size_t i = 0; i < image_count; ++i) { const VkImageView image_view{swapchain.GetImageViewIndex(i)}; @@ -678,16 +719,17 @@ void VKBlitScreen::ReleaseRawImages() { } void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = CalculateBufferSize(framebuffer); - ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; + const VkBufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = CalculateBufferSize(framebuffer), + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; buffer = device.GetLogical().CreateBuffer(ci); buffer_commit = memory_manager.Commit(buffer, true); @@ -697,24 +739,28 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) raw_images.resize(image_count); raw_buffer_commits.resize(image_count); - VkImageCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.imageType = VK_IMAGE_TYPE_2D; - ci.format = GetFormat(framebuffer); - ci.extent.width = framebuffer.width; - ci.extent.height = framebuffer.height; - ci.extent.depth = 1; - ci.mipLevels = 1; - ci.arrayLayers = 1; - ci.samples = VK_SAMPLE_COUNT_1_BIT; - ci.tiling = VK_IMAGE_TILING_LINEAR; - ci.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + const VkImageCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = VK_IMAGE_TYPE_2D, + .format = GetFormat(framebuffer), + .extent = + { + .width = framebuffer.width, + .height = framebuffer.height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }; for (std::size_t i = 0; i < image_count; ++i) { raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); @@ -723,39 +769,43 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) } void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { - VkDescriptorBufferInfo buffer_info; - buffer_info.buffer = *buffer; - buffer_info.offset = offsetof(BufferData, uniform); - buffer_info.range = sizeof(BufferData::uniform); - - VkWriteDescriptorSet ubo_write; - ubo_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - ubo_write.pNext = nullptr; - ubo_write.dstSet = descriptor_sets[image_index]; - ubo_write.dstBinding = 0; - ubo_write.dstArrayElement = 0; - ubo_write.descriptorCount = 1; - ubo_write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - ubo_write.pImageInfo = nullptr; - ubo_write.pBufferInfo = &buffer_info; - ubo_write.pTexelBufferView = nullptr; - - VkDescriptorImageInfo image_info; - image_info.sampler = *sampler; - image_info.imageView = image_view; - image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - - VkWriteDescriptorSet sampler_write; - sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - sampler_write.pNext = nullptr; - sampler_write.dstSet = descriptor_sets[image_index]; - sampler_write.dstBinding = 1; - sampler_write.dstArrayElement = 0; - sampler_write.descriptorCount = 1; - sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - sampler_write.pImageInfo = &image_info; - sampler_write.pBufferInfo = nullptr; - sampler_write.pTexelBufferView = nullptr; + const VkDescriptorBufferInfo buffer_info{ + .buffer = *buffer, + .offset = offsetof(BufferData, uniform), + .range = sizeof(BufferData::uniform), + }; + + const VkWriteDescriptorSet ubo_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .pImageInfo = nullptr, + .pBufferInfo = &buffer_info, + .pTexelBufferView = nullptr, + }; + + const VkDescriptorImageInfo image_info{ + .sampler = *sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + }; + + const VkWriteDescriptorSet sampler_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index], + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {}); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 2be38d419..1d2f8b557 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -39,16 +39,17 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = static_cast<VkDeviceSize>(size); - ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; + : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { + const VkBufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = static_cast<VkDeviceSize>(size), + .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; buffer.handle = device.GetLogical().CreateBuffer(ci); buffer.commit = memory_manager.Commit(buffer.handle, false); @@ -66,16 +67,17 @@ void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = handle; - barrier.offset = offset; - barrier.size = size; + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = UPLOAD_ACCESS_BARRIERS, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = size, + }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, barrier, {}); }); @@ -87,16 +89,17 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { const VkBuffer handle = Handle(); scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = handle; - barrier.offset = offset; - barrier.size = size; + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = size, + }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index da71e710c..182461ed9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -115,32 +115,32 @@ constexpr u8 quad_array[] = { 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { - VkDescriptorSetLayoutBinding binding; - binding.binding = 0; - binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - binding.descriptorCount = 1; - binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - binding.pImmutableSamplers = nullptr; - return binding; + return { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }; } VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { - VkDescriptorUpdateTemplateEntryKHR entry; - entry.dstBinding = 0; - entry.dstArrayElement = 0; - entry.descriptorCount = 1; - entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - entry.offset = 0; - entry.stride = sizeof(DescriptorUpdateEntry); - return entry; + return { + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), + }; } VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { - VkPushConstantRange range; - range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - range.offset = 0; - range.size = static_cast<u32>(size); - return range; + return { + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = static_cast<u32>(size), + }; } // Uint8 SPIR-V module. Generated from the "shaders/" directory. @@ -344,29 +344,33 @@ constexpr u8 QUAD_INDEXED_SPV[] = { 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { - std::array<VkDescriptorSetLayoutBinding, 2> bindings; - bindings[0].binding = 0; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[0].descriptorCount = 1; - bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[0].pImmutableSamplers = nullptr; - bindings[1].binding = 1; - bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[1].descriptorCount = 1; - bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[1].pImmutableSamplers = nullptr; - return bindings; + return {{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + }}; } VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { - VkDescriptorUpdateTemplateEntryKHR entry; - entry.dstBinding = 0; - entry.dstArrayElement = 0; - entry.descriptorCount = 2; - entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - entry.offset = 0; - entry.stride = sizeof(DescriptorUpdateEntry); - return entry; + return { + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 2, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), + }; } } // Anonymous namespace @@ -376,37 +380,37 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, const u8* code) { - VkDescriptorSetLayoutCreateInfo descriptor_layout_ci; - descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_layout_ci.pNext = nullptr; - descriptor_layout_ci.flags = 0; - descriptor_layout_ci.bindingCount = bindings.size(); - descriptor_layout_ci.pBindings = bindings.data(); - descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci); - - VkPipelineLayoutCreateInfo pipeline_layout_ci; - pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_ci.pNext = nullptr; - pipeline_layout_ci.flags = 0; - pipeline_layout_ci.setLayoutCount = 1; - pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address(); - pipeline_layout_ci.pushConstantRangeCount = push_constants.size(); - pipeline_layout_ci.pPushConstantRanges = push_constants.data(); - layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci); + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = bindings.size(), + .pBindings = bindings.data(), + }); + + layout = device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = push_constants.size(), + .pPushConstantRanges = push_constants.data(), + }); if (!templates.empty()) { - VkDescriptorUpdateTemplateCreateInfoKHR template_ci; - template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - template_ci.pNext = nullptr; - template_ci.flags = 0; - template_ci.descriptorUpdateEntryCount = templates.size(); - template_ci.pDescriptorUpdateEntries = templates.data(); - template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - template_ci.descriptorSetLayout = *descriptor_set_layout; - template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - template_ci.pipelineLayout = *layout; - template_ci.set = 0; - descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci); + descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = templates.size(), + .pDescriptorUpdateEntries = templates.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = 0, + }); descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); } @@ -414,32 +418,32 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); std::memcpy(code_copy.get(), code, code_size); - VkShaderModuleCreateInfo module_ci; - module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - module_ci.pNext = nullptr; - module_ci.flags = 0; - module_ci.codeSize = code_size; - module_ci.pCode = code_copy.get(); - module = device.GetLogical().CreateShaderModule(module_ci); - - VkComputePipelineCreateInfo pipeline_ci; - pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - pipeline_ci.pNext = nullptr; - pipeline_ci.flags = 0; - pipeline_ci.layout = *layout; - pipeline_ci.basePipelineHandle = nullptr; - pipeline_ci.basePipelineIndex = 0; - - VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage; - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage_ci.module = *module; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); + module = device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code_size, + .pCode = code_copy.get(), + }); + + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *layout, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }); } VKComputePass::~VKComputePass() = default; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 281bf9ac3..ed9d2991c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -43,12 +43,13 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { // TODO(Rodrigo): Maybe make individual bindings here? for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { - VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); - entry.binding = binding++; - entry.descriptorType = descriptor_type; - entry.descriptorCount = 1; - entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - entry.pImmutableSamplers = nullptr; + bindings.push_back({ + .binding = binding++, + .descriptorType = descriptor_type, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); } }; add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); @@ -58,25 +59,25 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - VkDescriptorSetLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.bindingCount = static_cast<u32>(bindings.size()); - ci.pBindings = bindings.data(); - return device.GetLogical().CreateDescriptorSetLayout(ci); + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(bindings.size()), + .pBindings = bindings.data(), + }); } vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - VkPipelineLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.setLayoutCount = 1; - ci.pSetLayouts = descriptor_set_layout.address(); - ci.pushConstantRangeCount = 0; - ci.pPushConstantRanges = nullptr; - return device.GetLogical().CreatePipelineLayout(ci); + return device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); } vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { @@ -89,59 +90,63 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat return {}; } - VkDescriptorUpdateTemplateCreateInfoKHR ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - ci.pNext = nullptr; - ci.flags = 0; - ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); - ci.pDescriptorUpdateEntries = template_entries.data(); - ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - ci.descriptorSetLayout = *descriptor_set_layout; - ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - ci.pipelineLayout = *layout; - ci.set = DESCRIPTOR_SET; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), + .pDescriptorUpdateEntries = template_entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = DESCRIPTOR_SET, + }); } vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { device.SaveShader(code); - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.codeSize = code.size() * sizeof(u32); - ci.pCode = code.data(); - return device.GetLogical().CreateShaderModule(ci); + return device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code.size() * sizeof(u32), + .pCode = code.data(), + }); } vk::Pipeline VKComputePipeline::CreatePipeline() const { - VkComputePipelineCreateInfo ci; - VkPipelineShaderStageCreateInfo& stage_ci = ci.stage; - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage_ci.module = *shader_module; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; - subgroup_size_ci.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; - subgroup_size_ci.pNext = nullptr; - subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; + + VkComputePipelineCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *shader_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *layout, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }; + + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - stage_ci.pNext = &subgroup_size_ci; + ci.stage.pNext = &subgroup_size_ci; } - ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.layout = *layout; - ci.basePipelineHandle = nullptr; - ci.basePipelineIndex = 0; return device.GetLogical().CreateComputePipeline(ci); } diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 9259b618d..ac4a0884e 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -43,27 +43,30 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; - - VkDescriptorPoolCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - ci.maxSets = num_sets; - ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes)); - ci.pPoolSizes = std::data(pool_sizes); + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, + }; + + const VkDescriptorPoolCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = num_sets, + .poolSizeCount = static_cast<u32>(std::size(pool_sizes)), + .pPoolSizes = std::data(pool_sizes), + }; return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); } vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count) { const std::vector layout_copies(count, layout); - VkDescriptorSetAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.descriptorPool = **active_pool; - ai.descriptorSetCount = static_cast<u32>(count); - ai.pSetLayouts = layout_copies.data(); + VkDescriptorSetAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = **active_pool, + .descriptorSetCount = static_cast<u32>(count), + .pSetLayouts = layout_copies.data(), + }; vk::DescriptorSets sets = active_pool->Allocate(ai); if (!sets.IsOutOfPoolMemory()) { diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 71f0ff427..6245e0d78 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -22,14 +22,21 @@ namespace { namespace Alternatives { -constexpr std::array Depth24UnormS8_UINT = {VK_FORMAT_D32_SFLOAT_S8_UINT, - VK_FORMAT_D16_UNORM_S8_UINT, VkFormat{}}; -constexpr std::array Depth16UnormS8_UINT = {VK_FORMAT_D24_UNORM_S8_UINT, - VK_FORMAT_D32_SFLOAT_S8_UINT, VkFormat{}}; +constexpr std::array Depth24UnormS8_UINT{ + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_D16_UNORM_S8_UINT, + VkFormat{}, +}; + +constexpr std::array Depth16UnormS8_UINT{ + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VkFormat{}, +}; } // namespace Alternatives -constexpr std::array REQUIRED_EXTENSIONS = { +constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_KHR_16BIT_STORAGE_EXTENSION_NAME, VK_KHR_8BIT_STORAGE_EXTENSION_NAME, @@ -178,97 +185,104 @@ bool VKDevice::Create() { const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(); - VkPhysicalDeviceFeatures2 features2; - features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - features2.pNext = nullptr; + VkPhysicalDeviceFeatures2 features2{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + .pNext = nullptr, + }; const void* first_next = &features2; void** next = &features2.pNext; - auto& features = features2.features; - features.robustBufferAccess = false; - features.fullDrawIndexUint32 = false; - features.imageCubeArray = false; - features.independentBlend = true; - features.geometryShader = true; - features.tessellationShader = true; - features.sampleRateShading = false; - features.dualSrcBlend = false; - features.logicOp = false; - features.multiDrawIndirect = false; - features.drawIndirectFirstInstance = false; - features.depthClamp = true; - features.depthBiasClamp = true; - features.fillModeNonSolid = false; - features.depthBounds = false; - features.wideLines = false; - features.largePoints = true; - features.alphaToOne = false; - features.multiViewport = true; - features.samplerAnisotropy = true; - features.textureCompressionETC2 = false; - features.textureCompressionASTC_LDR = is_optimal_astc_supported; - features.textureCompressionBC = false; - features.occlusionQueryPrecise = true; - features.pipelineStatisticsQuery = false; - features.vertexPipelineStoresAndAtomics = true; - features.fragmentStoresAndAtomics = true; - features.shaderTessellationAndGeometryPointSize = false; - features.shaderImageGatherExtended = true; - features.shaderStorageImageExtendedFormats = false; - features.shaderStorageImageMultisample = false; - features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; - features.shaderStorageImageWriteWithoutFormat = true; - features.shaderUniformBufferArrayDynamicIndexing = false; - features.shaderSampledImageArrayDynamicIndexing = false; - features.shaderStorageBufferArrayDynamicIndexing = false; - features.shaderStorageImageArrayDynamicIndexing = false; - features.shaderClipDistance = false; - features.shaderCullDistance = false; - features.shaderFloat64 = false; - features.shaderInt64 = false; - features.shaderInt16 = false; - features.shaderResourceResidency = false; - features.shaderResourceMinLod = false; - features.sparseBinding = false; - features.sparseResidencyBuffer = false; - features.sparseResidencyImage2D = false; - features.sparseResidencyImage3D = false; - features.sparseResidency2Samples = false; - features.sparseResidency4Samples = false; - features.sparseResidency8Samples = false; - features.sparseResidency16Samples = false; - features.sparseResidencyAliased = false; - features.variableMultisampleRate = false; - features.inheritedQueries = false; - - VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage; - bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR; - bit16_storage.pNext = nullptr; - bit16_storage.storageBuffer16BitAccess = false; - bit16_storage.uniformAndStorageBuffer16BitAccess = true; - bit16_storage.storagePushConstant16 = false; - bit16_storage.storageInputOutput16 = false; + features2.features = { + .robustBufferAccess = false, + .fullDrawIndexUint32 = false, + .imageCubeArray = false, + .independentBlend = true, + .geometryShader = true, + .tessellationShader = true, + .sampleRateShading = false, + .dualSrcBlend = false, + .logicOp = false, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, + .depthClamp = true, + .depthBiasClamp = true, + .fillModeNonSolid = false, + .depthBounds = false, + .wideLines = false, + .largePoints = true, + .alphaToOne = false, + .multiViewport = true, + .samplerAnisotropy = true, + .textureCompressionETC2 = false, + .textureCompressionASTC_LDR = is_optimal_astc_supported, + .textureCompressionBC = false, + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = false, + .vertexPipelineStoresAndAtomics = true, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = false, + .shaderImageGatherExtended = true, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, + .shaderStorageImageWriteWithoutFormat = true, + .shaderUniformBufferArrayDynamicIndexing = false, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .shaderResourceResidency = false, + .shaderResourceMinLod = false, + .sparseBinding = false, + .sparseResidencyBuffer = false, + .sparseResidencyImage2D = false, + .sparseResidencyImage3D = false, + .sparseResidency2Samples = false, + .sparseResidency4Samples = false, + .sparseResidency8Samples = false, + .sparseResidency16Samples = false, + .sparseResidencyAliased = false, + .variableMultisampleRate = false, + .inheritedQueries = false, + }; + + VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, + .pNext = nullptr, + .storageBuffer16BitAccess = false, + .uniformAndStorageBuffer16BitAccess = true, + .storagePushConstant16 = false, + .storageInputOutput16 = false, + }; SetNext(next, bit16_storage); - VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage; - bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR; - bit8_storage.pNext = nullptr; - bit8_storage.storageBuffer8BitAccess = false; - bit8_storage.uniformAndStorageBuffer8BitAccess = true; - bit8_storage.storagePushConstant8 = false; + VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR, + .pNext = nullptr, + .storageBuffer8BitAccess = false, + .uniformAndStorageBuffer8BitAccess = true, + .storagePushConstant8 = false, + }; SetNext(next, bit8_storage); - VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; - host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT; - host_query_reset.hostQueryReset = true; + VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT, + .hostQueryReset = true, + }; SetNext(next, host_query_reset); VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { - float16_int8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; - float16_int8.pNext = nullptr; - float16_int8.shaderFloat16 = true; - float16_int8.shaderInt8 = false; + float16_int8 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR, + .pNext = nullptr, + .shaderFloat16 = true, + .shaderInt8 = false, + }; SetNext(next, float16_int8); } else { LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); @@ -280,10 +294,11 @@ bool VKDevice::Create() { VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { - std430_layout.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR; - std430_layout.pNext = nullptr; - std430_layout.uniformBufferStandardLayout = true; + std430_layout = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR, + .pNext = nullptr, + .uniformBufferStandardLayout = true, + }; SetNext(next, std430_layout); } else { LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); @@ -291,9 +306,11 @@ bool VKDevice::Create() { VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; if (ext_index_type_uint8) { - index_type_uint8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT; - index_type_uint8.pNext = nullptr; - index_type_uint8.indexTypeUint8 = true; + index_type_uint8 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT, + .pNext = nullptr, + .indexTypeUint8 = true, + }; SetNext(next, index_type_uint8); } else { LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); @@ -301,11 +318,12 @@ bool VKDevice::Create() { VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; if (ext_transform_feedback) { - transform_feedback.sType = - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; - transform_feedback.pNext = nullptr; - transform_feedback.transformFeedback = true; - transform_feedback.geometryStreams = true; + transform_feedback = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT, + .pNext = nullptr, + .transformFeedback = true, + .geometryStreams = true, + }; SetNext(next, transform_feedback); } else { LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); @@ -313,10 +331,12 @@ bool VKDevice::Create() { VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border; if (ext_custom_border_color) { - custom_border.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; - custom_border.pNext = nullptr; - custom_border.customBorderColors = VK_TRUE; - custom_border.customBorderColorWithoutFormat = VK_TRUE; + custom_border = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT, + .pNext = nullptr, + .customBorderColors = VK_TRUE, + .customBorderColorWithoutFormat = VK_TRUE, + }; SetNext(next, custom_border); } else { LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors"); @@ -324,9 +344,11 @@ bool VKDevice::Create() { VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; if (ext_extended_dynamic_state) { - dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; - dynamic_state.pNext = nullptr; - dynamic_state.extendedDynamicState = VK_TRUE; + dynamic_state = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT, + .pNext = nullptr, + .extendedDynamicState = VK_TRUE, + }; SetNext(next, dynamic_state); } else { LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); @@ -340,11 +362,13 @@ bool VKDevice::Create() { if (nv_device_diagnostics_config) { nsight_aftermath_tracker.Initialize(); - diagnostics_nv.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV; - diagnostics_nv.pNext = &features2; - diagnostics_nv.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV | - VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV | - VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV; + diagnostics_nv = { + .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV, + .pNext = &features2, + .flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV | + VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV | + VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV, + }; first_next = &diagnostics_nv; } @@ -713,13 +737,15 @@ void VKDevice::SetupFeatures() { } void VKDevice::CollectTelemetryParameters() { - VkPhysicalDeviceDriverPropertiesKHR driver; - driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR; - driver.pNext = nullptr; + VkPhysicalDeviceDriverPropertiesKHR driver{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, + .pNext = nullptr, + }; - VkPhysicalDeviceProperties2KHR properties; - properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; - properties.pNext = &driver; + VkPhysicalDeviceProperties2KHR properties{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, + .pNext = &driver, + }; physical.GetProperties2KHR(properties); driver_id = driver.driverID; @@ -728,22 +754,24 @@ void VKDevice::CollectTelemetryParameters() { const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); reported_extensions.reserve(std::size(extensions)); for (const auto& extension : extensions) { - reported_extensions.push_back(extension.extensionName); + reported_extensions.emplace_back(extension.extensionName); } } std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { static constexpr float QUEUE_PRIORITY = 1.0f; - std::unordered_set<u32> unique_queue_families = {graphics_family, present_family}; + std::unordered_set<u32> unique_queue_families{graphics_family, present_family}; std::vector<VkDeviceQueueCreateInfo> queue_cis; + queue_cis.reserve(unique_queue_families.size()); for (const u32 queue_family : unique_queue_families) { - VkDeviceQueueCreateInfo& ci = queue_cis.emplace_back(); - ci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.queueFamilyIndex = queue_family; + auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .queueFamilyIndex = queue_family, + }); ci.queueCount = 1; ci.pQueuePriorities = &QUEUE_PRIORITY; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 844445105..aaf930b90 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -28,15 +28,15 @@ namespace { template <class StencilFace> VkStencilOpState GetStencilFaceState(const StencilFace& face) { - VkStencilOpState state; - state.failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()); - state.passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()); - state.depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()); - state.compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()); - state.compareMask = 0; - state.writeMask = 0; - state.reference = 0; - return state; + return { + .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), + .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), + .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), + .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), + .compareMask = 0, + .writeMask = 0, + .reference = 0, + }; } bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { @@ -52,20 +52,21 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { } VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { - union { + union Swizzle { u32 raw; BitField<0, 3, Maxwell::ViewportSwizzle> x; BitField<4, 3, Maxwell::ViewportSwizzle> y; BitField<8, 3, Maxwell::ViewportSwizzle> z; BitField<12, 3, Maxwell::ViewportSwizzle> w; - } const unpacked{swizzle}; - - VkViewportSwizzleNV result; - result.x = MaxwellToVK::ViewportSwizzle(unpacked.x); - result.y = MaxwellToVK::ViewportSwizzle(unpacked.y); - result.z = MaxwellToVK::ViewportSwizzle(unpacked.z); - result.w = MaxwellToVK::ViewportSwizzle(unpacked.w); - return result; + }; + const Swizzle unpacked{swizzle}; + + return { + .x = MaxwellToVK::ViewportSwizzle(unpacked.x), + .y = MaxwellToVK::ViewportSwizzle(unpacked.y), + .z = MaxwellToVK::ViewportSwizzle(unpacked.z), + .w = MaxwellToVK::ViewportSwizzle(unpacked.w), + }; } } // Anonymous namespace @@ -100,24 +101,26 @@ VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( vk::Span<VkDescriptorSetLayoutBinding> bindings) const { - VkDescriptorSetLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.bindingCount = bindings.size(); - ci.pBindings = bindings.data(); + const VkDescriptorSetLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = bindings.size(), + .pBindings = bindings.data(), + }; return device.GetLogical().CreateDescriptorSetLayout(ci); } vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { - VkPipelineLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.setLayoutCount = 1; - ci.pSetLayouts = descriptor_set_layout.address(); - ci.pushConstantRangeCount = 0; - ci.pPushConstantRanges = nullptr; + const VkPipelineLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; return device.GetLogical().CreatePipelineLayout(ci); } @@ -136,26 +139,28 @@ vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTempla return {}; } - VkDescriptorUpdateTemplateCreateInfoKHR ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - ci.pNext = nullptr; - ci.flags = 0; - ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); - ci.pDescriptorUpdateEntries = template_entries.data(); - ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - ci.descriptorSetLayout = *descriptor_set_layout; - ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - ci.pipelineLayout = *layout; - ci.set = DESCRIPTOR_SET; + const VkDescriptorUpdateTemplateCreateInfoKHR ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), + .pDescriptorUpdateEntries = template_entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = DESCRIPTOR_SET, + }; return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); } std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( const SPIRVProgram& program) const { - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; + VkShaderModuleCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; std::vector<vk::ShaderModule> modules; modules.reserve(Maxwell::MaxShaderStage); @@ -204,15 +209,17 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa const bool instanced = state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - auto& vertex_binding = vertex_bindings.emplace_back(); - vertex_binding.binding = static_cast<u32>(index); - vertex_binding.stride = binding.stride; - vertex_binding.inputRate = rate; + vertex_bindings.push_back({ + .binding = static_cast<u32>(index), + .stride = binding.stride, + .inputRate = rate, + }); if (instanced) { - auto& binding_divisor = vertex_binding_divisors.emplace_back(); - binding_divisor.binding = static_cast<u32>(index); - binding_divisor.divisor = state.binding_divisors[index]; + vertex_binding_divisors.push_back({ + .binding = static_cast<u32>(index), + .divisor = state.binding_divisors[index], + }); } } @@ -227,116 +234,132 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa // Skip attributes not used by the vertex shaders. continue; } - auto& vertex_attribute = vertex_attributes.emplace_back(); - vertex_attribute.location = static_cast<u32>(index); - vertex_attribute.binding = attribute.buffer; - vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()); - vertex_attribute.offset = attribute.offset; + vertex_attributes.push_back({ + .location = static_cast<u32>(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); } - VkPipelineVertexInputStateCreateInfo vertex_input_ci; - vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vertex_input_ci.pNext = nullptr; - vertex_input_ci.flags = 0; - vertex_input_ci.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()); - vertex_input_ci.pVertexBindingDescriptions = vertex_bindings.data(); - vertex_input_ci.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()); - vertex_input_ci.pVertexAttributeDescriptions = vertex_attributes.data(); - - VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci; - input_divisor_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT; - input_divisor_ci.pNext = nullptr; - input_divisor_ci.vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()); - input_divisor_ci.pVertexBindingDivisors = vertex_binding_divisors.data(); + VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()), + .pVertexBindingDescriptions = vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), + .pVertexAttributeDescriptions = vertex_attributes.data(), + }; + + const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()), + .pVertexBindingDivisors = vertex_binding_divisors.data(), + }; if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - VkPipelineInputAssemblyStateCreateInfo input_assembly_ci; - input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - input_assembly_ci.pNext = nullptr; - input_assembly_ci.flags = 0; - input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()); - input_assembly_ci.primitiveRestartEnable = - state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology); - - VkPipelineTessellationStateCreateInfo tessellation_ci; - tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; - tessellation_ci.pNext = nullptr; - tessellation_ci.flags = 0; - tessellation_ci.patchControlPoints = state.patch_control_points_minus_one.Value() + 1; - - VkPipelineViewportStateCreateInfo viewport_ci; - viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewport_ci.pNext = nullptr; - viewport_ci.flags = 0; - viewport_ci.viewportCount = Maxwell::NumViewports; - viewport_ci.pViewports = nullptr; - viewport_ci.scissorCount = Maxwell::NumViewports; - viewport_ci.pScissors = nullptr; + const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()); + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()), + .primitiveRestartEnable = state.primitive_restart_enable != 0 && + SupportsPrimitiveRestart(input_assembly_topology), + }; + + const VkPipelineTessellationStateCreateInfo tessellation_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + }; + + VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci; - swizzle_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV; - swizzle_ci.pNext = nullptr; - swizzle_ci.flags = 0; - swizzle_ci.viewportCount = Maxwell::NumViewports; - swizzle_ci.pViewportSwizzles = swizzles.data(); + VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewportSwizzles = swizzles.data(), + }; if (device.IsNvViewportSwizzleSupported()) { viewport_ci.pNext = &swizzle_ci; } - VkPipelineRasterizationStateCreateInfo rasterization_ci; - rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rasterization_ci.pNext = nullptr; - rasterization_ci.flags = 0; - rasterization_ci.depthClampEnable = state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE; - rasterization_ci.rasterizerDiscardEnable = state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE; - rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; - rasterization_ci.cullMode = - dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE; - rasterization_ci.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()); - rasterization_ci.depthBiasEnable = state.depth_bias_enable; - rasterization_ci.depthBiasConstantFactor = 0.0f; - rasterization_ci.depthBiasClamp = 0.0f; - rasterization_ci.depthBiasSlopeFactor = 0.0f; - rasterization_ci.lineWidth = 1.0f; - - VkPipelineMultisampleStateCreateInfo multisample_ci; - multisample_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisample_ci.pNext = nullptr; - multisample_ci.flags = 0; - multisample_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - multisample_ci.sampleShadingEnable = VK_FALSE; - multisample_ci.minSampleShading = 0.0f; - multisample_ci.pSampleMask = nullptr; - multisample_ci.alphaToCoverageEnable = VK_FALSE; - multisample_ci.alphaToOneEnable = VK_FALSE; - - VkPipelineDepthStencilStateCreateInfo depth_stencil_ci; - depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - depth_stencil_ci.pNext = nullptr; - depth_stencil_ci.flags = 0; - depth_stencil_ci.depthTestEnable = dynamic.depth_test_enable; - depth_stencil_ci.depthWriteEnable = dynamic.depth_write_enable; - depth_stencil_ci.depthCompareOp = dynamic.depth_test_enable - ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) - : VK_COMPARE_OP_ALWAYS; - depth_stencil_ci.depthBoundsTestEnable = dynamic.depth_bounds_enable; - depth_stencil_ci.stencilTestEnable = dynamic.stencil_enable; - depth_stencil_ci.front = GetStencilFaceState(dynamic.front); - depth_stencil_ci.back = GetStencilFaceState(dynamic.back); - depth_stencil_ci.minDepthBounds = 0.0f; - depth_stencil_ci.maxDepthBounds = 0.0f; + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = + static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + .rasterizerDiscardEnable = + static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = + dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE, + .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), + .depthBiasEnable = state.depth_bias_enable, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + + const VkPipelineMultisampleStateCreateInfo multisample_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + + const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = dynamic.depth_test_enable, + .depthWriteEnable = dynamic.depth_write_enable, + .depthCompareOp = dynamic.depth_test_enable + ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) + : VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .stencilTestEnable = dynamic.stencil_enable, + .front = GetStencilFaceState(dynamic.front), + .back = GetStencilFaceState(dynamic.back), + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, + }; std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments); for (std::size_t index = 0; index < num_attachments; ++index) { - static constexpr std::array COMPONENT_TABLE = { - VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, - VK_COLOR_COMPONENT_A_BIT}; + static constexpr std::array COMPONENT_TABLE{ + VK_COLOR_COMPONENT_R_BIT, + VK_COLOR_COMPONENT_G_BIT, + VK_COLOR_COMPONENT_B_BIT, + VK_COLOR_COMPONENT_A_BIT, + }; const auto& blend = state.attachments[index]; VkColorComponentFlags color_components = 0; @@ -346,35 +369,36 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa } } - VkPipelineColorBlendAttachmentState& attachment = cb_attachments[index]; - attachment.blendEnable = blend.enable != 0; - attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()); - attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()); - attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()); - attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()); - attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()); - attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()); - attachment.colorWriteMask = color_components; + cb_attachments[index] = { + .blendEnable = blend.enable != 0, + .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), + .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), + .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), + .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), + .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), + .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), + .colorWriteMask = color_components, + }; } - VkPipelineColorBlendStateCreateInfo color_blend_ci; - color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - color_blend_ci.pNext = nullptr; - color_blend_ci.flags = 0; - color_blend_ci.logicOpEnable = VK_FALSE; - color_blend_ci.logicOp = VK_LOGIC_OP_COPY; - color_blend_ci.attachmentCount = static_cast<u32>(num_attachments); - color_blend_ci.pAttachments = cb_attachments.data(); - std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants)); - - std::vector dynamic_states = { + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = static_cast<u32>(num_attachments), + .pAttachments = cb_attachments.data(), + }; + + std::vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, }; if (device.IsExtExtendedDynamicStateSupported()) { - static constexpr std::array extended = { + static constexpr std::array extended{ VK_DYNAMIC_STATE_CULL_MODE_EXT, VK_DYNAMIC_STATE_FRONT_FACE_EXT, VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT, @@ -389,18 +413,19 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); } - VkPipelineDynamicStateCreateInfo dynamic_state_ci; - dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_ci.pNext = nullptr; - dynamic_state_ci.flags = 0; - dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size()); - dynamic_state_ci.pDynamicStates = dynamic_states.data(); + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast<u32>(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; - VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; - subgroup_size_ci.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; - subgroup_size_ci.pNext = nullptr; - subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; std::vector<VkPipelineShaderStageCreateInfo> shader_stages; std::size_t module_index = 0; @@ -408,6 +433,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa if (!program[stage]) { continue; } + VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; stage_ci.pNext = nullptr; @@ -422,26 +448,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa } } - VkGraphicsPipelineCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.stageCount = static_cast<u32>(shader_stages.size()); - ci.pStages = shader_stages.data(); - ci.pVertexInputState = &vertex_input_ci; - ci.pInputAssemblyState = &input_assembly_ci; - ci.pTessellationState = &tessellation_ci; - ci.pViewportState = &viewport_ci; - ci.pRasterizationState = &rasterization_ci; - ci.pMultisampleState = &multisample_ci; - ci.pDepthStencilState = &depth_stencil_ci; - ci.pColorBlendState = &color_blend_ci; - ci.pDynamicState = &dynamic_state_ci; - ci.layout = *layout; - ci.renderPass = renderpass; - ci.subpass = 0; - ci.basePipelineHandle = nullptr; - ci.basePipelineIndex = 0; + const VkGraphicsPipelineCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = &tessellation_ci, + .pViewportState = &viewport_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisample_ci, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }; return device.GetLogical().CreateGraphicsPipeline(ci); } diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp index 9bceb3861..1c418ea17 100644 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ b/src/video_core/renderer_vulkan/vk_image.cpp @@ -102,21 +102,29 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num void VKImage::CreatePresentView() { // Image type has to be 2D to be presented. - VkImageViewCreateInfo image_view_ci; - image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - image_view_ci.pNext = nullptr; - image_view_ci.flags = 0; - image_view_ci.image = *image; - image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; - image_view_ci.format = format; - image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - image_view_ci.subresourceRange.aspectMask = aspect_mask; - image_view_ci.subresourceRange.baseMipLevel = 0; - image_view_ci.subresourceRange.levelCount = 1; - image_view_ci.subresourceRange.baseArrayLayer = 0; - image_view_ci.subresourceRange.layerCount = 1; - present_view = device.GetLogical().CreateImageView(image_view_ci); + present_view = device.GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = format, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); } VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index b4c650a63..24c8960ac 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -178,13 +178,12 @@ bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 t }(); // Try to allocate found type. - VkMemoryAllocateInfo memory_ai; - memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_ai.pNext = nullptr; - memory_ai.allocationSize = size; - memory_ai.memoryTypeIndex = type; - - vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai); + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = size, + .memoryTypeIndex = type, + }); if (!memory) { LOG_CRITICAL(Render_Vulkan, "Device allocation failed!"); return false; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3da835324..42b3a744c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -88,12 +88,13 @@ void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& bindi // Combined image samplers can be arrayed. count = container[i].size; } - VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); - entry.binding = binding++; - entry.descriptorType = descriptor_type; - entry.descriptorCount = count; - entry.stageFlags = stage_flags; - entry.pImmutableSamplers = nullptr; + bindings.push_back({ + .binding = binding++, + .descriptorType = descriptor_type, + .descriptorCount = count, + .stageFlags = stage_flags, + .pImmutableSamplers = nullptr, + }); } } @@ -259,10 +260,10 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach } } - Specialization specialization; - specialization.workgroup_size = key.workgroup_size; - specialization.shared_memory_size = key.shared_memory_size; - + const Specialization specialization{ + .workgroup_size = key.workgroup_size, + .shared_memory_size = key.shared_memory_size, + }; const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, shader->GetRegistry(), specialization), shader->GetEntries()}; @@ -370,13 +371,14 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { for (u32 i = 0; i < count; ++i) { const u32 num_samplers = container[i].size; - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding; - entry.dstArrayElement = 0; - entry.descriptorCount = num_samplers; - entry.descriptorType = descriptor_type; - entry.offset = offset; - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = num_samplers, + .descriptorType = descriptor_type, + .offset = offset, + .stride = entry_size, + }); ++binding; offset += num_samplers * entry_size; @@ -389,22 +391,24 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 // Nvidia has a bug where updating multiple texels at once causes the driver to crash. // Note: Fixed in driver Windows 443.24, Linux 440.66.15 for (u32 i = 0; i < count; ++i) { - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding + i; - entry.dstArrayElement = 0; - entry.descriptorCount = 1; - entry.descriptorType = descriptor_type; - entry.offset = static_cast<std::size_t>(offset + i * entry_size); - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding + i, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = descriptor_type, + .offset = static_cast<std::size_t>(offset + i * entry_size), + .stride = entry_size, + }); } } else if (count > 0) { - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding; - entry.dstArrayElement = 0; - entry.descriptorCount = count; - entry.descriptorType = descriptor_type; - entry.offset = offset; - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = count, + .descriptorType = descriptor_type, + .offset = offset, + .stride = entry_size, + }); } offset += count * entry_size; binding += count; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index bc91c48cc..6cd63d090 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -47,14 +47,14 @@ std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) { void QueryPool::Allocate(std::size_t begin, std::size_t end) { usage.resize(end); - VkQueryPoolCreateInfo query_pool_ci; - query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; - query_pool_ci.pNext = nullptr; - query_pool_ci.flags = 0; - query_pool_ci.queryType = GetTarget(type); - query_pool_ci.queryCount = static_cast<u32>(end - begin); - query_pool_ci.pipelineStatistics = 0; - pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci)); + pools.push_back(device->GetLogical().CreateQueryPool({ + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .queryType = GetTarget(type), + .queryCount = static_cast<u32>(end - begin), + .pipelineStatistics = 0, + })); } void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7625871c2..31e44aa2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -64,20 +64,22 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; const float height = src.scale_y * 2.0f; + const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; - VkViewport viewport; - viewport.x = src.translate_x - src.scale_x; - viewport.y = src.translate_y - src.scale_y; - viewport.width = width != 0.0f ? width : 1.0f; - viewport.height = height != 0.0f ? height : 1.0f; + VkViewport viewport{ + .x = src.translate_x - src.scale_x, + .y = src.translate_y - src.scale_y, + .width = width != 0.0f ? width : 1.0f, + .height = height != 0.0f ? height : 1.0f, + .minDepth = src.translate_z - src.scale_z * reduce_z, + .maxDepth = src.translate_z + src.scale_z, + }; - const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; - viewport.minDepth = src.translate_z - src.scale_z * reduce_z; - viewport.maxDepth = src.translate_z + src.scale_z; if (!device.IsExtDepthRangeUnrestrictedSupported()) { viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); } + return viewport; } @@ -508,10 +510,11 @@ void RasterizerVulkan::Clear() { const u32 color_attachment = regs.clear_buffers.RT; scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { - VkClearAttachment attachment; - attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - attachment.colorAttachment = color_attachment; - attachment.clearValue = clear_value; + const VkClearAttachment attachment{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = color_attachment, + .clearValue = clear_value, + }; cmdbuf.ClearAttachments(attachment, clear_rect); }); } @@ -551,13 +554,16 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { query_cache.UpdateCounters(); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - ComputePipelineCacheKey key; - key.shader = code_addr; - key.shared_memory_size = launch_desc.shared_alloc; - key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z}; - - auto& pipeline = pipeline_cache.GetComputePipeline(key); + auto& pipeline = pipeline_cache.GetComputePipeline({ + .shader = code_addr, + .shared_memory_size = launch_desc.shared_alloc, + .workgroup_size = + { + launch_desc.block_dim_x, + launch_desc.block_dim_y, + launch_desc.block_dim_z, + }, + }); // Compute dispatches can't be executed inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); @@ -841,17 +847,17 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); auto& framebuffer = fbentry->second; if (is_cache_miss) { - VkFramebufferCreateInfo framebuffer_ci; - framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebuffer_ci.pNext = nullptr; - framebuffer_ci.flags = 0; - framebuffer_ci.renderPass = key.renderpass; - framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size()); - framebuffer_ci.pAttachments = key.views.data(); - framebuffer_ci.width = key.width; - framebuffer_ci.height = key.height; - framebuffer_ci.layers = key.layers; - framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci); + framebuffer = device.GetLogical().CreateFramebuffer({ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = key.renderpass, + .attachmentCount = static_cast<u32>(key.views.size()), + .pAttachments = key.views.data(), + .width = key.width, + .height = key.height, + .layers = key.layers, + }); } return {*framebuffer, VkExtent2D{key.width, key.height}}; @@ -1553,17 +1559,17 @@ VkBuffer RasterizerVulkan::DefaultBuffer() { return *default_buffer; } - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = DEFAULT_BUFFER_SIZE; - ci.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - default_buffer = device.GetLogical().CreateBuffer(ci); + default_buffer = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = DEFAULT_BUFFER_SIZE, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); default_buffer_commit = memory_manager.Commit(default_buffer, false); scheduler.RequestOutsideRenderPassOperationContext(); diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 3f71d005e..80284cf92 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -39,10 +39,14 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { using namespace VideoCore::Surface; + const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); + std::vector<VkAttachmentDescription> descriptors; + descriptors.reserve(num_attachments); + std::vector<VkAttachmentReference> color_references; + color_references.reserve(num_attachments); - const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); for (std::size_t rt = 0; rt < num_attachments; ++rt) { const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]); const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); @@ -54,20 +58,22 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - VkAttachmentDescription& descriptor = descriptors.emplace_back(); - descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; - descriptor.format = format.format; - descriptor.samples = VK_SAMPLE_COUNT_1_BIT; - descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - descriptor.initialLayout = color_layout; - descriptor.finalLayout = color_layout; - - VkAttachmentReference& reference = color_references.emplace_back(); - reference.attachment = static_cast<u32>(rt); - reference.layout = color_layout; + descriptors.push_back({ + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = format.format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = color_layout, + .finalLayout = color_layout, + }); + + color_references.push_back({ + .attachment = static_cast<u32>(rt), + .layout = color_layout, + }); } VkAttachmentReference zeta_attachment_ref; @@ -82,32 +88,36 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param const VkImageLayout zeta_layout = params.zeta_texception != 0 ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - VkAttachmentDescription& descriptor = descriptors.emplace_back(); - descriptor.flags = 0; - descriptor.format = format.format; - descriptor.samples = VK_SAMPLE_COUNT_1_BIT; - descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.initialLayout = zeta_layout; - descriptor.finalLayout = zeta_layout; - - zeta_attachment_ref.attachment = static_cast<u32>(num_attachments); - zeta_attachment_ref.layout = zeta_layout; + descriptors.push_back({ + .flags = 0, + .format = format.format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = zeta_layout, + .finalLayout = zeta_layout, + }); + + zeta_attachment_ref = { + .attachment = static_cast<u32>(num_attachments), + .layout = zeta_layout, + }; } - VkSubpassDescription subpass_description; - subpass_description.flags = 0; - subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass_description.inputAttachmentCount = 0; - subpass_description.pInputAttachments = nullptr; - subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size()); - subpass_description.pColorAttachments = color_references.data(); - subpass_description.pResolveAttachments = nullptr; - subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr; - subpass_description.preserveAttachmentCount = 0; - subpass_description.pPreserveAttachments = nullptr; + const VkSubpassDescription subpass_description{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast<u32>(color_references.size()), + .pColorAttachments = color_references.data(), + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; VkAccessFlags access = 0; VkPipelineStageFlags stage = 0; @@ -122,26 +132,27 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; } - VkSubpassDependency subpass_dependency; - subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL; - subpass_dependency.dstSubpass = 0; - subpass_dependency.srcStageMask = stage; - subpass_dependency.dstStageMask = stage; - subpass_dependency.srcAccessMask = 0; - subpass_dependency.dstAccessMask = access; - subpass_dependency.dependencyFlags = 0; - - VkRenderPassCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.attachmentCount = static_cast<u32>(descriptors.size()); - ci.pAttachments = descriptors.data(); - ci.subpassCount = 1; - ci.pSubpasses = &subpass_description; - ci.dependencyCount = 1; - ci.pDependencies = &subpass_dependency; - return device.GetLogical().CreateRenderPass(ci); + const VkSubpassDependency subpass_dependency{ + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = stage, + .dstStageMask = stage, + .srcAccessMask = 0, + .dstAccessMask = access, + .dependencyFlags = 0, + }; + + return device.GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast<u32>(descriptors.size()), + .pAttachments = descriptors.data(), + .subpassCount = 1, + .pSubpasses = &subpass_description, + .dependencyCount = 1, + .pDependencies = &subpass_dependency, + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp index dc06f545a..f19330a36 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp @@ -18,33 +18,32 @@ namespace { constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; constexpr std::size_t FENCES_GROW_STEP = 0x40; -VkFenceCreateInfo BuildFenceCreateInfo() { - VkFenceCreateInfo fence_ci; - fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fence_ci.pNext = nullptr; - fence_ci.flags = 0; - return fence_ci; +constexpr VkFenceCreateInfo BuildFenceCreateInfo() { + return { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; } } // Anonymous namespace class CommandBufferPool final : public VKFencedPool { public: - CommandBufferPool(const VKDevice& device) + explicit CommandBufferPool(const VKDevice& device) : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} void Allocate(std::size_t begin, std::size_t end) override { // Command buffers are going to be commited, recorded, executed every single usage cycle. // They are also going to be reseted when commited. - VkCommandPoolCreateInfo command_pool_ci; - command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - command_pool_ci.pNext = nullptr; - command_pool_ci.flags = - VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily(); - Pool& pool = pools.emplace_back(); - pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci); + pool.handle = device.GetLogical().CreateCommandPool({ + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = device.GetGraphicsFamily(), + }); pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE); } diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 616eacc36..2d5460776 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -44,32 +44,35 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c const bool arbitrary_borders = device.IsExtCustomBorderColorSupported(); const std::array color = tsc.GetBorderColor(); - VkSamplerCustomBorderColorCreateInfoEXT border; - border.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT; - border.pNext = nullptr; - border.format = VK_FORMAT_UNDEFINED; + VkSamplerCustomBorderColorCreateInfoEXT border{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, + .pNext = nullptr, + .format = VK_FORMAT_UNDEFINED, + }; std::memcpy(&border.customBorderColor, color.data(), sizeof(color)); - VkSamplerCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - ci.pNext = arbitrary_borders ? &border : nullptr; - ci.flags = 0; - ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter); - ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter); - ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter); - ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter); - ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter); - ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter); - ci.mipLodBias = tsc.GetLodBias(); - ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE; - ci.maxAnisotropy = tsc.GetMaxAnisotropy(); - ci.compareEnable = tsc.depth_compare_enabled; - ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); - ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(); - ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(); - ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color); - ci.unnormalizedCoordinates = VK_FALSE; - return device.GetLogical().CreateSampler(ci); + return device.GetLogical().CreateSampler({ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = arbitrary_borders ? &border : nullptr, + .flags = 0, + .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), + .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), + .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), + .mipLodBias = tsc.GetLodBias(), + .anisotropyEnable = + static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE), + .maxAnisotropy = tsc.GetMaxAnisotropy(), + .compareEnable = tsc.depth_compare_enabled, + .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), + .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(), + .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(), + .borderColor = + arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), + .unnormalizedCoordinates = VK_FALSE, + }); } VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 56524e6f3..dbbd0961a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -100,16 +100,19 @@ void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer frame state.framebuffer = framebuffer; state.render_area = render_area; - VkRenderPassBeginInfo renderpass_bi; - renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - renderpass_bi.pNext = nullptr; - renderpass_bi.renderPass = renderpass; - renderpass_bi.framebuffer = framebuffer; - renderpass_bi.renderArea.offset.x = 0; - renderpass_bi.renderArea.offset.y = 0; - renderpass_bi.renderArea.extent = render_area; - renderpass_bi.clearValueCount = 0; - renderpass_bi.pClearValues = nullptr; + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer, + .renderArea = + { + .offset = {.x = 0, .y = 0}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { if (end_renderpass) { @@ -157,16 +160,17 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { current_cmdbuf.End(); - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = nullptr; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.pWaitDstStageMask = nullptr; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = current_cmdbuf.address(); - submit_info.signalSemaphoreCount = semaphore ? 1 : 0; - submit_info.pSignalSemaphores = &semaphore; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = 0, + .pWaitSemaphores = nullptr, + .pWaitDstStageMask = nullptr, + .commandBufferCount = 1, + .pCommandBuffers = current_cmdbuf.address(), + .signalSemaphoreCount = semaphore ? 1U : 0U, + .pSignalSemaphores = &semaphore, + }; switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) { case VK_SUCCESS: break; @@ -181,19 +185,18 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { void VKScheduler::AllocateNewContext() { ++ticks; - VkCommandBufferBeginInfo cmdbuf_bi; - cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - cmdbuf_bi.pNext = nullptr; - cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - cmdbuf_bi.pInheritanceInfo = nullptr; - std::unique_lock lock{mutex}; current_fence = next_fence; next_fence = &resource_manager.CommitFence(); current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence), device.GetDispatchLoader()); - current_cmdbuf.Begin(cmdbuf_bi); + current_cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 112df9c71..c1a218d76 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -19,13 +19,13 @@ vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, cons const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); std::memcpy(data.get(), code_data, code_size); - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.codeSize = code_size; - ci.pCode = data.get(); - return device.GetLogical().CreateShaderModule(ci); + return device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code_size, + .pCode = data.get(), + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 45c180221..5eca0ab91 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -71,20 +71,19 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { const u32 log2 = Common::Log2Ceil64(size); - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = 1ULL << log2; - ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - auto buffer = std::make_unique<VKBuffer>(); - buffer->handle = device.GetLogical().CreateBuffer(ci); + buffer->handle = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = 1ULL << log2, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); buffer->commit = memory_manager.Commit(buffer->handle, host_visible); auto& entries = GetCache(host_visible)[log2].entries; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index e5a583dd5..9151d9fb1 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -158,6 +158,7 @@ void StateTracker::Initialize() { SetupDirtyFrontFace(tables); SetupDirtyPrimitiveTopology(tables); SetupDirtyStencilOp(tables); + SetupDirtyStencilTestEnable(tables); } void StateTracker::InvalidateCommandBufferState() { diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 2d28a6c47..a5526a3f5 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -122,30 +122,27 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { // Substract from the preferred heap size some bytes to avoid getting out of memory. const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024; - - VkBufferCreateInfo buffer_ci; - buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_ci.pNext = nullptr; - buffer_ci.flags = 0; - buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size); - buffer_ci.usage = usage; - buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - buffer_ci.queueFamilyIndexCount = 0; - buffer_ci.pQueueFamilyIndices = nullptr; - - buffer = device.GetLogical().CreateBuffer(buffer_ci); + buffer = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer); const u32 required_flags = requirements.memoryTypeBits; stream_buffer_size = static_cast<u64>(requirements.size); - VkMemoryAllocateInfo memory_ai; - memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_ai.pNext = nullptr; - memory_ai.allocationSize = requirements.size; - memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags); - - memory = device.GetLogical().AllocateMemory(memory_ai); + memory = device.GetLogical().AllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = requirements.size, + .memoryTypeIndex = GetMemoryType(memory_properties, required_flags), + }); buffer.BindMemory(*memory, 0); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index bffd8f32a..c25e312b6 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -95,15 +95,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) { const auto present_queue{device.GetPresentQueue()}; bool recreated = false; - VkPresentInfoKHR present_info; - present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; - present_info.pNext = nullptr; - present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U; - present_info.pWaitSemaphores = semaphores.data(); - present_info.swapchainCount = 1; - present_info.pSwapchains = swapchain.address(); - present_info.pImageIndices = &image_index; - present_info.pResults = nullptr; + const VkPresentInfoKHR present_info{ + .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreCount = render_semaphore ? 2U : 1U, + .pWaitSemaphores = semaphores.data(), + .swapchainCount = 1, + .pSwapchains = swapchain.address(), + .pImageIndices = &image_index, + .pResults = nullptr, + }; switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: @@ -147,24 +148,25 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, requested_image_count = capabilities.maxImageCount; } - VkSwapchainCreateInfoKHR swapchain_ci; - swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; - swapchain_ci.pNext = nullptr; - swapchain_ci.flags = 0; - swapchain_ci.surface = surface; - swapchain_ci.minImageCount = requested_image_count; - swapchain_ci.imageFormat = surface_format.format; - swapchain_ci.imageColorSpace = surface_format.colorSpace; - swapchain_ci.imageArrayLayers = 1; - swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - swapchain_ci.queueFamilyIndexCount = 0; - swapchain_ci.pQueueFamilyIndices = nullptr; - swapchain_ci.preTransform = capabilities.currentTransform; - swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - swapchain_ci.presentMode = present_mode; - swapchain_ci.clipped = VK_FALSE; - swapchain_ci.oldSwapchain = nullptr; + VkSwapchainCreateInfoKHR swapchain_ci{ + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .surface = surface, + .minImageCount = requested_image_count, + .imageFormat = surface_format.format, + .imageColorSpace = surface_format.colorSpace, + .imageArrayLayers = 1, + .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .preTransform = capabilities.currentTransform, + .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, + .presentMode = present_mode, + .clipped = VK_FALSE, + .oldSwapchain = nullptr, + }; const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; @@ -173,8 +175,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT; swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); - } else { - swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; } // Request the size again to reduce the possibility of a TOCTOU race condition. @@ -200,20 +200,28 @@ void VKSwapchain::CreateSemaphores() { } void VKSwapchain::CreateImageViews() { - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - // ci.image - ci.viewType = VK_IMAGE_VIEW_TYPE_2D; - ci.format = image_format; - ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - ci.subresourceRange.baseMipLevel = 0; - ci.subresourceRange.levelCount = 1; - ci.subresourceRange.baseArrayLayer = 0; - ci.subresourceRange.layerCount = 1; + VkImageViewCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_format, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; image_views.resize(image_count); for (std::size_t i = 0; i < image_count; i++) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 29c53a5fa..d102e6d27 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -95,17 +95,18 @@ VkImageViewType GetImageViewType(SurfaceTarget target) { vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, std::size_t host_memory_size) { // TODO(Rodrigo): Move texture buffer creation to the buffer cache - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = static_cast<VkDeviceSize>(host_memory_size); - ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - return device.GetLogical().CreateBuffer(ci); + return device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = static_cast<VkDeviceSize>(host_memory_size), + .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); } VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, @@ -113,15 +114,16 @@ VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, std::size_t host_memory_size) { ASSERT(params.IsBuffer()); - VkBufferViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.buffer = buffer; - ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; - ci.offset = 0; - ci.range = static_cast<VkDeviceSize>(host_memory_size); - return ci; + return { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = buffer, + .format = + MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format, + .offset = 0, + .range = static_cast<VkDeviceSize>(host_memory_size), + }; } VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { @@ -130,23 +132,23 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP const auto [format, attachable, storage] = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); - VkImageCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.imageType = SurfaceTargetToImage(params.target); - ci.format = format; - ci.mipLevels = params.num_levels; - ci.arrayLayers = static_cast<u32>(params.GetNumLayers()); - ci.samples = VK_SAMPLE_COUNT_1_BIT; - ci.tiling = VK_IMAGE_TILING_OPTIMAL; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + VkImageCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = SurfaceTargetToImage(params.target), + .format = format, + .mipLevels = params.num_levels, + .arrayLayers = static_cast<u32>(params.GetNumLayers()), + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }; if (attachable) { ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; @@ -281,12 +283,10 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.pNext = nullptr; - barrier.srcAccessMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - barrier.dstAccessMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; - barrier.srcQueueFamilyIndex = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstQueueFamilyIndex = VK_ACCESS_SHADER_READ_BIT; - barrier.srcQueueFamilyIndex = 0; - barrier.dstQueueFamilyIndex = 0; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.buffer = dst_buffer; barrier.offset = 0; barrier.size = size; @@ -323,22 +323,25 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { } VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { - VkBufferImageCopy copy; - copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted); - copy.bufferRowLength = 0; - copy.bufferImageHeight = 0; - copy.imageSubresource.aspectMask = image->GetAspectMask(); - copy.imageSubresource.mipLevel = level; - copy.imageSubresource.baseArrayLayer = 0; - copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers()); - copy.imageOffset.x = 0; - copy.imageOffset.y = 0; - copy.imageOffset.z = 0; - copy.imageExtent.width = params.GetMipWidth(level); - copy.imageExtent.height = params.GetMipHeight(level); - copy.imageExtent.depth = - params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; - return copy; + return { + .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = + { + .aspectMask = image->GetAspectMask(), + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = static_cast<u32>(params.GetNumLayers()), + }, + .imageOffset = {.x = 0, .y = 0, .z = 0}, + .imageExtent = + { + .width = params.GetMipWidth(level), + .height = params.GetMipHeight(level), + .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, + }, + }; } VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { @@ -418,20 +421,29 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc ASSERT(num_slices == params.depth); } - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.image = surface.GetImageHandle(); - ci.viewType = image_view_type; - ci.format = surface.GetImage().GetFormat(); - ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]}; - ci.subresourceRange.aspectMask = aspect; - ci.subresourceRange.baseMipLevel = base_level; - ci.subresourceRange.levelCount = num_levels; - ci.subresourceRange.baseArrayLayer = base_layer; - ci.subresourceRange.layerCount = num_layers; - image_view = device.GetLogical().CreateImageView(ci); + image_view = device.GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = surface.GetImageHandle(), + .viewType = image_view_type, + .format = surface.GetImage().GetFormat(), + .components = + { + .r = swizzle[0], + .g = swizzle[1], + .b = swizzle[2], + .a = swizzle[3], + }, + .subresourceRange = + { + .aspectMask = aspect, + .baseMipLevel = base_level, + .levelCount = num_levels, + .baseArrayLayer = base_layer, + .layerCount = num_layers, + }, + }); return last_image_view = *image_view; } @@ -441,17 +453,26 @@ VkImageView CachedSurfaceView::GetAttachment() { return *render_target; } - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.image = surface.GetImageHandle(); - ci.format = surface.GetImage().GetFormat(); - ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - ci.subresourceRange.aspectMask = aspect_mask; - ci.subresourceRange.baseMipLevel = base_level; - ci.subresourceRange.levelCount = num_levels; + VkImageViewCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = surface.GetImageHandle(), + .format = surface.GetImage().GetFormat(), + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = aspect_mask, + .baseMipLevel = base_level, + .levelCount = num_levels, + }, + }; if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; ci.subresourceRange.baseArrayLayer = base_slice; @@ -504,24 +525,40 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkImageCopy copy; - copy.srcSubresource.aspectMask = src_surface->GetAspectMask(); - copy.srcSubresource.mipLevel = copy_params.source_level; - copy.srcSubresource.baseArrayLayer = copy_params.source_z; - copy.srcSubresource.layerCount = num_layers; - copy.srcOffset.x = copy_params.source_x; - copy.srcOffset.y = copy_params.source_y; - copy.srcOffset.z = 0; - copy.dstSubresource.aspectMask = dst_surface->GetAspectMask(); - copy.dstSubresource.mipLevel = copy_params.dest_level; - copy.dstSubresource.baseArrayLayer = dst_base_layer; - copy.dstSubresource.layerCount = num_layers; - copy.dstOffset.x = copy_params.dest_x; - copy.dstOffset.y = copy_params.dest_y; - copy.dstOffset.z = dst_offset_z; - copy.extent.width = copy_params.width; - copy.extent.height = copy_params.height; - copy.extent.depth = extent_z; + const VkImageCopy copy{ + .srcSubresource = + { + .aspectMask = src_surface->GetAspectMask(), + .mipLevel = copy_params.source_level, + .baseArrayLayer = copy_params.source_z, + .layerCount = num_layers, + }, + .srcOffset = + { + .x = static_cast<s32>(copy_params.source_x), + .y = static_cast<s32>(copy_params.source_y), + .z = 0, + }, + .dstSubresource = + { + .aspectMask = dst_surface->GetAspectMask(), + .mipLevel = copy_params.dest_level, + .baseArrayLayer = dst_base_layer, + .layerCount = num_layers, + }, + .dstOffset = + { + .x = static_cast<s32>(copy_params.dest_x), + .y = static_cast<s32>(copy_params.dest_y), + .z = static_cast<s32>(dst_offset_z), + }, + .extent = + { + .width = copy_params.width, + .height = copy_params.height, + .depth = extent_z, + }, + }; const VkImage src_image = src_surface->GetImageHandle(); const VkImage dst_image = dst_surface->GetImageHandle(); diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 051298cc8..14cac38ea 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -377,24 +377,26 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions, InstanceDispatch& dld) noexcept { - VkApplicationInfo application_info; - application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - application_info.pNext = nullptr; - application_info.pApplicationName = "yuzu Emulator"; - application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); - application_info.pEngineName = "yuzu Emulator"; - application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); - application_info.apiVersion = VK_API_VERSION_1_1; - - VkInstanceCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.pApplicationInfo = &application_info; - ci.enabledLayerCount = layers.size(); - ci.ppEnabledLayerNames = layers.data(); - ci.enabledExtensionCount = extensions.size(); - ci.ppEnabledExtensionNames = extensions.data(); + static constexpr VkApplicationInfo application_info{ + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pNext = nullptr, + .pApplicationName = "yuzu Emulator", + .applicationVersion = VK_MAKE_VERSION(0, 1, 0), + .pEngineName = "yuzu Emulator", + .engineVersion = VK_MAKE_VERSION(0, 1, 0), + .apiVersion = VK_API_VERSION_1_1, + }; + + const VkInstanceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .pApplicationInfo = &application_info, + .enabledLayerCount = layers.size(), + .ppEnabledLayerNames = layers.data(), + .enabledExtensionCount = extensions.size(), + .ppEnabledExtensionNames = extensions.data(), + }; VkInstance instance; if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { @@ -425,19 +427,20 @@ std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices( DebugCallback Instance::TryCreateDebugCallback( PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { - VkDebugUtilsMessengerCreateInfoEXT ci; - ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; - ci.pNext = nullptr; - ci.flags = 0; - ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; - ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; - ci.pfnUserCallback = callback; - ci.pUserData = nullptr; + const VkDebugUtilsMessengerCreateInfoEXT ci{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = callback, + .pUserData = nullptr, + }; VkDebugUtilsMessengerEXT messenger; if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { @@ -468,12 +471,13 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c } CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { - VkCommandBufferAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.commandPool = handle; - ai.level = level; - ai.commandBufferCount = static_cast<u32>(num_buffers); + const VkCommandBufferAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .pNext = nullptr, + .commandPool = handle, + .level = level, + .commandBufferCount = static_cast<u32>(num_buffers), + }; std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers); switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) { @@ -497,17 +501,18 @@ std::vector<VkImage> SwapchainKHR::GetImages() const { Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, Span<const char*> enabled_extensions, const void* next, DeviceDispatch& dld) noexcept { - VkDeviceCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - ci.pNext = next; - ci.flags = 0; - ci.queueCreateInfoCount = queues_ci.size(); - ci.pQueueCreateInfos = queues_ci.data(); - ci.enabledLayerCount = 0; - ci.ppEnabledLayerNames = nullptr; - ci.enabledExtensionCount = enabled_extensions.size(); - ci.ppEnabledExtensionNames = enabled_extensions.data(); - ci.pEnabledFeatures = nullptr; + const VkDeviceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pNext = next, + .flags = 0, + .queueCreateInfoCount = queues_ci.size(), + .pQueueCreateInfos = queues_ci.data(), + .enabledLayerCount = 0, + .ppEnabledLayerNames = nullptr, + .enabledExtensionCount = enabled_extensions.size(), + .ppEnabledExtensionNames = enabled_extensions.data(), + .pEnabledFeatures = nullptr, + }; VkDevice device; if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { @@ -548,10 +553,11 @@ ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { } Semaphore Device::CreateSemaphore() const { - VkSemaphoreCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; + static constexpr VkSemaphoreCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; VkSemaphore object; Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object)); @@ -639,10 +645,12 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons } Event Device::CreateEvent() const { - VkEventCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; + static constexpr VkEventCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; + VkEvent object; Check(dld->vkCreateEvent(handle, &ci, nullptr, &object)); return Event(object, handle, *dld); diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp new file mode 100644 index 000000000..b7f66d7ee --- /dev/null +++ b/src/video_core/shader/async_shaders.cpp @@ -0,0 +1,181 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <chrono> +#include <condition_variable> +#include <mutex> +#include <thread> +#include <vector> +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/shader/async_shaders.h" + +namespace VideoCommon::Shader { + +AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} + +AsyncShaders::~AsyncShaders() { + KillWorkers(); +} + +void AsyncShaders::AllocateWorkers(std::size_t num_workers) { + // If we're already have workers queued or don't want to queue workers, ignore + if (num_workers == worker_threads.size() || num_workers == 0) { + return; + } + + // If workers already exist, clear them + if (!worker_threads.empty()) { + FreeWorkers(); + } + + // Create workers + for (std::size_t i = 0; i < num_workers; i++) { + context_list.push_back(emu_window.CreateSharedContext()); + worker_threads.push_back(std::move( + std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()))); + } +} + +void AsyncShaders::FreeWorkers() { + // Mark all threads to quit + is_thread_exiting.store(true); + cv.notify_all(); + for (auto& thread : worker_threads) { + thread.join(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +void AsyncShaders::KillWorkers() { + is_thread_exiting.store(true); + for (auto& thread : worker_threads) { + thread.detach(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +bool AsyncShaders::HasWorkQueued() { + return !pending_queue.empty(); +} + +bool AsyncShaders::HasCompletedWork() { + std::shared_lock lock{completed_mutex}; + return !finished_work.empty(); +} + +bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { + const auto& regs = gpu.Maxwell3D().regs; + + // If something is using depth, we can assume that games are not rendering anything which will + // be used one time. + if (regs.zeta_enable) { + return true; + } + + // If games are using a small index count, we can assume these are full screen quads. Usually + // these shaders are only used once for building textures so we can assume they can't be built + // async + if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { + return false; + } + + return true; +} + +std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { + std::vector<AsyncShaders::Result> results; + { + std::unique_lock lock{completed_mutex}; + results.assign(std::make_move_iterator(finished_work.begin()), + std::make_move_iterator(finished_work.end())); + finished_work.clear(); + } + return results; +} + +void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, + Tegra::Engines::ShaderType shader_type, u64 uid, + std::vector<u64> code, std::vector<u64> code_b, + u32 main_offset, + VideoCommon::Shader::CompilerSettings compiler_settings, + const VideoCommon::Shader::Registry& registry, + VAddr cpu_addr) { + WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM + : AsyncShaders::Backend::OpenGL, + device, + shader_type, + uid, + std::move(code), + std::move(code_b), + main_offset, + compiler_settings, + registry, + cpu_addr}; + std::unique_lock lock(queue_mutex); + pending_queue.push_back(std::move(params)); + cv.notify_one(); +} + +void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { + using namespace std::chrono_literals; + while (!is_thread_exiting.load(std::memory_order_relaxed)) { + std::unique_lock lock{queue_mutex}; + cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); + if (is_thread_exiting) { + return; + } + + // Partial lock to allow all threads to read at the same time + if (!HasWorkQueued()) { + continue; + } + // Another thread beat us, just unlock and wait for the next load + if (pending_queue.empty()) { + continue; + } + // Pull work from queue + WorkerParams work = std::move(pending_queue.front()); + pending_queue.pop_front(); + + lock.unlock(); + + if (work.backend == AsyncShaders::Backend::OpenGL || + work.backend == AsyncShaders::Backend::GLASM) { + const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); + const auto scope = context->Acquire(); + auto program = + OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); + Result result{}; + result.backend = work.backend; + result.cpu_address = work.cpu_address; + result.uid = work.uid; + result.code = std::move(work.code); + result.code_b = std::move(work.code_b); + result.shader_type = work.shader_type; + + if (work.backend == AsyncShaders::Backend::OpenGL) { + result.program.opengl = std::move(program->source_program); + } else if (work.backend == AsyncShaders::Backend::GLASM) { + result.program.glasm = std::move(program->assembly_program); + } + + { + std::unique_lock complete_lock(completed_mutex); + finished_work.push_back(std::move(result)); + } + } + } +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h new file mode 100644 index 000000000..2f5ee94ad --- /dev/null +++ b/src/video_core/shader/async_shaders.h @@ -0,0 +1,109 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <condition_variable> +#include <deque> +#include <memory> +#include <shared_mutex> +#include <thread> +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" + +namespace Core::Frontend { +class EmuWindow; +class GraphicsContext; +} // namespace Core::Frontend + +namespace Tegra { +class GPU; +} + +namespace VideoCommon::Shader { + +class AsyncShaders { +public: + enum class Backend { + OpenGL, + GLASM, + }; + + struct ResultPrograms { + OpenGL::OGLProgram opengl; + OpenGL::OGLAssemblyProgram glasm; + }; + + struct Result { + u64 uid; + VAddr cpu_address; + Backend backend; + ResultPrograms program; + std::vector<u64> code; + std::vector<u64> code_b; + Tegra::Engines::ShaderType shader_type; + }; + + explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); + ~AsyncShaders(); + + /// Start up shader worker threads + void AllocateWorkers(std::size_t num_workers); + + /// Clear the shader queue and kill all worker threads + void FreeWorkers(); + + // Force end all threads + void KillWorkers(); + + /// Check to see if any shaders have actually been compiled + bool HasCompletedWork(); + + /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build + /// every shader async as some shaders are only built and executed once. We try to "guess" which + /// shader would be used only once + bool IsShaderAsync(const Tegra::GPU& gpu) const; + + /// Pulls completed compiled shaders + std::vector<Result> GetCompletedWork(); + + void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, + u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, + VideoCommon::Shader::CompilerSettings compiler_settings, + const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); + +private: + void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); + + /// Check our worker queue to see if we have any work queued already + bool HasWorkQueued(); + + struct WorkerParams { + AsyncShaders::Backend backend; + OpenGL::Device device; + Tegra::Engines::ShaderType shader_type; + u64 uid; + std::vector<u64> code; + std::vector<u64> code_b; + u32 main_offset; + VideoCommon::Shader::CompilerSettings compiler_settings; + VideoCommon::Shader::Registry registry; + VAddr cpu_address; + }; + + std::condition_variable cv; + std::mutex queue_mutex; + std::shared_mutex completed_mutex; + std::atomic<bool> is_thread_exiting{}; + std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; + std::vector<std::thread> worker_threads; + std::deque<WorkerParams> pending_queue; + std::vector<AsyncShaders::Result> finished_work; + Core::Frontend::EmuWindow& emu_window; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index b7608fc7b..015a789d6 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -209,11 +209,11 @@ private: } // Remove them from the cache - const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) { + const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) { return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != removed_shaders.end(); }; - storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end()); + std::erase_if(storage, is_removed); } /// @brief Creates a new entry in the lookup cache and returns its pointer diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp new file mode 100644 index 000000000..c3c71657d --- /dev/null +++ b/src/video_core/shader_notify.cpp @@ -0,0 +1,42 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/shader_notify.h" + +using namespace std::chrono_literals; + +namespace VideoCore { +namespace { +constexpr auto UPDATE_TICK = 32ms; +} + +ShaderNotify::ShaderNotify() = default; +ShaderNotify::~ShaderNotify() = default; + +std::size_t ShaderNotify::GetShadersBuilding() { + const auto now = std::chrono::high_resolution_clock::now(); + const auto diff = now - last_update; + if (diff > UPDATE_TICK) { + std::shared_lock lock(mutex); + last_updated_count = accurate_count; + } + return last_updated_count; +} + +std::size_t ShaderNotify::GetShadersBuildingAccurate() { + std::shared_lock lock{mutex}; + return accurate_count; +} + +void ShaderNotify::MarkShaderComplete() { + std::unique_lock lock{mutex}; + accurate_count--; +} + +void ShaderNotify::MarkSharderBuilding() { + std::unique_lock lock{mutex}; + accurate_count++; +} + +} // namespace VideoCore diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h new file mode 100644 index 000000000..a9c92d179 --- /dev/null +++ b/src/video_core/shader_notify.h @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <chrono> +#include <shared_mutex> +#include "common/common_types.h" + +namespace VideoCore { +class ShaderNotify { +public: + ShaderNotify(); + ~ShaderNotify(); + + std::size_t GetShadersBuilding(); + std::size_t GetShadersBuildingAccurate(); + + void MarkShaderComplete(); + void MarkSharderBuilding(); + +private: + std::size_t last_updated_count{}; + std::size_t accurate_count{}; + std::shared_mutex mutex; + std::chrono::high_resolution_clock::time_point last_update{}; +}; +} // namespace VideoCore diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 1803fce15..9e5fe2374 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -343,8 +343,7 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); } if (is_tiled && is_layered) { - return Common::AlignBits(size, - Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); + return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); } return size; } @@ -418,7 +417,7 @@ std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { const u32 block_size = GetBlockSize(); const u32 block_index = offset / block_size; const u32 gob_offset = offset % block_size; - const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize()); + const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE); const u32 x_gob_pixels = 64U / GetBytesPerPixel(); const u32 x_block_pixels = x_gob_pixels << block_width; const u32 y_block_pixels = 8U << block_height; diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 24957df8d..118aa689e 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -204,7 +204,7 @@ public: static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, const u32 block_depth) { return Common::AlignBits(out_size, - Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); + Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); } /// Converts a width from a type of surface into another. This helps represent the diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 0cd17cb89..474ae620a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -6,6 +6,7 @@ #include <cstring> #include "common/alignment.h" #include "common/assert.h" +#include "common/bit_util.h" #include "video_core/gpu.h" #include "video_core/textures/decoders.h" #include "video_core/textures/texture.h" @@ -37,20 +38,10 @@ struct alignas(64) SwizzleTable { std::array<std::array<u16, M>, N> values{}; }; -constexpr u32 gob_size_x_shift = 6; -constexpr u32 gob_size_y_shift = 3; -constexpr u32 gob_size_z_shift = 0; -constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; +constexpr u32 FAST_SWIZZLE_ALIGN = 16; -constexpr u32 gob_size_x = 1U << gob_size_x_shift; -constexpr u32 gob_size_y = 1U << gob_size_y_shift; -constexpr u32 gob_size_z = 1U << gob_size_z_shift; -constexpr u32 gob_size = 1U << gob_size_shift; - -constexpr u32 fast_swizzle_align = 16; - -constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); -constexpr auto fast_swizzle_table = SwizzleTable<gob_size_y, 4, fast_swizzle_align>(); +constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); +constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); /** * This function manages ALL the GOBs(Group of Bytes) Inside a single block. @@ -69,17 +60,17 @@ void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, con u32 y_address = z_address; u32 pixel_base = layer_z * z + y_start * stride_x; for (u32 y = y_start; y < y_end; y++) { - const auto& table = legacy_swizzle_table[y % gob_size_y]; + const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; for (u32 x = x_start; x < x_end; x++) { - const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % gob_size_x]}; + const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]}; const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; data_ptrs[unswizzle] = swizzled_data + swizzle_offset; data_ptrs[!unswizzle] = unswizzled_data + pixel_index; std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); } pixel_base += stride_x; - if ((y + 1) % gob_size_y == 0) - y_address += gob_size; + if ((y + 1) % GOB_SIZE_Y == 0) + y_address += GOB_SIZE; } z_address += xy_block_size; } @@ -104,18 +95,18 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const u32 y_address = z_address; u32 pixel_base = layer_z * z + y_start * stride_x; for (u32 y = y_start; y < y_end; y++) { - const auto& table = fast_swizzle_table[y % gob_size_y]; - for (u32 xb = x_startb; xb < x_endb; xb += fast_swizzle_align) { - const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; + const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y]; + for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) { + const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]}; const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; const u32 pixel_index{out_x + pixel_base}; data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; - std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); + std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN); } pixel_base += stride_x; - if ((y + 1) % gob_size_y == 0) - y_address += gob_size; + if ((y + 1) % GOB_SIZE_Y == 0) + y_address += GOB_SIZE; } z_address += xy_block_size; } @@ -138,9 +129,9 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; const u32 stride_x = width * out_bytes_per_pixel; const u32 layer_z = height * stride_x; - const u32 gob_elements_x = gob_size_x / bytes_per_pixel; - constexpr u32 gob_elements_y = gob_size_y; - constexpr u32 gob_elements_z = gob_size_z; + const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; + constexpr u32 gob_elements_y = GOB_SIZE_Y; + constexpr u32 gob_elements_z = GOB_SIZE_Z; const u32 block_x_elements = gob_elements_x; const u32 block_y_elements = gob_elements_y * block_height; const u32 block_z_elements = gob_elements_z * block_depth; @@ -148,7 +139,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); const u32 blocks_on_y = div_ceil(height, block_y_elements); const u32 blocks_on_z = div_ceil(depth, block_z_elements); - const u32 xy_block_size = gob_size * block_height; + const u32 xy_block_size = GOB_SIZE * block_height; const u32 block_size = xy_block_size * block_depth; u32 tile_offset = 0; for (u32 zb = 0; zb < blocks_on_z; zb++) { @@ -182,7 +173,7 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { const u32 block_height_size{1U << block_height}; const u32 block_depth_size{1U << block_depth}; - if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { + if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) { SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, bytes_per_pixel, out_bytes_per_pixel, block_height_size, block_depth_size, width_spacing); @@ -212,25 +203,26 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, } void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, + u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit, u32 offset_x, u32 offset_y) { const u32 block_height = 1U << block_height_bit; - const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / - gob_size_x}; + const u32 image_width_in_gobs = + (swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X; for (u32 line = 0; line < subrect_height; ++line) { const u32 dst_y = line + offset_y; const u32 gob_address_y = - (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + - ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size; - const auto& table = legacy_swizzle_table[dst_y % gob_size_y]; + (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + + ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; + const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 dst_x = x + offset_x; const u32 gob_address = - gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height; - const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x]; - u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; - u8* dest_addr = swizzled_data + swizzled_offset; + gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height; + const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X]; + const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel; + const u8* const source_line = unswizzled_data + unswizzled_offset; + u8* const dest_addr = swizzled_data + swizzled_offset; std::memcpy(dest_addr, source_line, bytes_per_pixel); } } @@ -242,14 +234,15 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 const u32 block_height = 1U << block_height_bit; for (u32 line = 0; line < subrect_height; ++line) { const u32 y2 = line + offset_y; - const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + - ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; - const auto& table = legacy_swizzle_table[y2 % gob_size_y]; + const u32 gob_address_y = (y2 / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height + + ((y2 % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; + const auto& table = LEGACY_SWIZZLE_TABLE[y2 % GOB_SIZE_Y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 x2 = (x + offset_x) * bytes_per_pixel; - const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; - const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; - u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; + const u32 gob_address = gob_address_y + (x2 / GOB_SIZE_X) * GOB_SIZE * block_height; + const u32 swizzled_offset = gob_address + table[x2 % GOB_SIZE_X]; + const u32 unswizzled_offset = line * dest_pitch + x * bytes_per_pixel; + u8* dest_line = unswizzled_data + unswizzled_offset; u8* source_addr = swizzled_data + swizzled_offset; std::memcpy(dest_line, source_addr, bytes_per_pixel); @@ -257,21 +250,48 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 } } +void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, + u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, + u32 origin_y, u8* output, const u8* input) { + UNIMPLEMENTED_IF(origin_x > 0); + UNIMPLEMENTED_IF(origin_y > 0); + + const u32 stride = width * bytes_per_pixel; + const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); + + const u32 block_height_mask = (1U << block_height) - 1; + const u32 x_shift = Common::CountTrailingZeroes32(GOB_SIZE << (block_height + block_depth)); + + for (u32 line = 0; line < line_count; ++line) { + const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; + const u32 block_y = line / GOB_SIZE_Y; + const u32 dst_offset_y = + (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; + for (u32 x = 0; x < line_length_in; ++x) { + const u32 dst_offset = + ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X]; + const u32 src_offset = x * bytes_per_pixel + line * pitch; + std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel); + } + } +} + void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, u8* swizzle_data) { const u32 block_height = 1U << block_height_bit; - const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; + const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X}; std::size_t count = 0; for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { const std::size_t gob_address_y = - (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + - ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; - const auto& table = legacy_swizzle_table[y % gob_size_y]; + (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + + ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; + const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { const std::size_t gob_address = - gob_address_y + (x / gob_size_x) * gob_size * block_height; - const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; + gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; + const std::size_t swizzled_offset = gob_address + table[x % GOB_SIZE_X]; const u8* source_line = source_data + count; u8* dest_addr = swizzle_data + swizzled_offset; count++; @@ -284,9 +304,9 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth) { if (tiled) { - const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); - const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); - const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); + const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, GOB_SIZE_X_SHIFT); + const u32 aligned_height = Common::AlignBits(height, GOB_SIZE_Y_SHIFT + block_height); + const u32 aligned_depth = Common::AlignBits(depth, GOB_SIZE_Z_SHIFT + block_depth); return aligned_width * aligned_height * aligned_depth; } else { return width * height * depth * bytes_per_pixel; @@ -297,14 +317,14 @@ u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, u32 bytes_per_pixel) { auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; const u32 gobs_in_block = 1 << block_height; - const u32 y_blocks = gob_size_y << block_height; - const u32 x_per_gob = gob_size_x / bytes_per_pixel; + const u32 y_blocks = GOB_SIZE_Y << block_height; + const u32 x_per_gob = GOB_SIZE_X / bytes_per_pixel; const u32 x_blocks = div_ceil(width, x_per_gob); - const u32 block_size = gob_size * gobs_in_block; + const u32 block_size = GOB_SIZE * gobs_in_block; const u32 stride = block_size * x_blocks; const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size; const u32 relative_y = dst_y % y_blocks; - return base + (relative_y / gob_size_y) * gob_size; + return base + (relative_y / GOB_SIZE_Y) * GOB_SIZE; } } // namespace Tegra::Texture diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 4c9a15048..d6fe35d37 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -10,15 +10,15 @@ namespace Tegra::Texture { -// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents -// an small rect of (64/bytes_per_pixel)X8. -inline std::size_t GetGOBSize() { - return 512; -} +constexpr u32 GOB_SIZE_X = 64; +constexpr u32 GOB_SIZE_Y = 8; +constexpr u32 GOB_SIZE_Z = 1; +constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; -inline std::size_t GetGOBSizeShift() { - return 9; -} +constexpr std::size_t GOB_SIZE_X_SHIFT = 6; +constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; +constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; +constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; /// Unswizzles a swizzled texture without changing its format. void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, @@ -44,14 +44,32 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height /// Copies an untiled subrectangle into a tiled surface. void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, - u32 offset_x, u32 offset_y); + u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, + u32 block_height_bit, u32 offset_x, u32 offset_y); /// Copies a tiled subrectangle into a linear surface. void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, u32 offset_x, u32 offset_y); +/// @brief Swizzles a 2D array of pixels into a 3D texture +/// @param line_length_in Number of pixels per line +/// @param line_count Number of lines +/// @param pitch Number of bytes per line +/// @param width Width of the swizzled texture +/// @param height Height of the swizzled texture +/// @param bytes_per_pixel Number of bytes used per pixel +/// @param block_height Block height shift +/// @param block_depth Block depth shift +/// @param origin_x Column offset in pixels of the swizzled texture +/// @param origin_y Row offset in pixels of the swizzled texture +/// @param output Pointer to the pixels of the swizzled texture +/// @param input Pointer to the 2D array of pixels used as input +/// @pre input and output points to an array large enough to hold the number of bytes used +void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, + u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, + u32 origin_y, u8* output, const u8* input); + void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, std::size_t copy_size, const u8* source_data, u8* swizzle_data); |
