diff options
| author | Fernando Sahmkow <fsahmkow27@gmail.com> | 2020-03-14 09:48:15 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-03-14 09:48:15 -0400 |
| commit | 35145bd529c3517e2c366efc764a762092d96edf (patch) | |
| tree | 58c80a2133092b990ca11f3a357d70fab2c5fd0b /src/video_core/renderer_vulkan | |
| parent | 666d431ad8ee4e36f1b7f48d13f3fa63ba3675f2 (diff) | |
| parent | 69c7a01f88a1839a3d950cab968accfa5100ea18 (diff) | |
Merge pull request #3490 from ReinUsesLisp/transform-feedbacks
video_core: Initial implementation of transform feedbacks
Diffstat (limited to 'src/video_core/renderer_vulkan')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_device.cpp | 47 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_device.h | 45 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 42 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 138 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.h | 13 |
8 files changed, 236 insertions, 74 deletions
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 886bde3b9..3847bd722 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan features.occlusionQueryPrecise = true; features.fragmentStoresAndAtomics = true; features.shaderImageGatherExtended = true; - features.shaderStorageImageReadWithoutFormat = - is_shader_storage_img_read_without_format_supported; + features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; features.shaderStorageImageWriteWithoutFormat = true; features.textureCompressionASTC_LDR = is_optimal_astc_supported; @@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); } + vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; + if (ext_transform_feedback) { + transform_feedback.transformFeedback = true; + transform_feedback.geometryStreams = true; + SetNext(next, transform_feedback); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami } }; - extensions.reserve(14); + extensions.reserve(15); extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); @@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami [[maybe_unused]] const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); - bool khr_shader_float16_int8{}; - bool ext_subgroup_size_control{}; + bool has_khr_shader_float16_int8{}; + bool has_ext_subgroup_size_control{}; + bool has_ext_transform_feedback{}; for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { Test(extension, khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); - Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); + Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, + false); Test(extension, ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); Test(extension, ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); - Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, + Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, + false); + Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { Test(extension, nv_device_diagnostic_checkpoints, @@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami } } - if (khr_shader_float16_int8) { + if (has_khr_shader_float16_int8) { is_float16_supported = GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } - if (ext_subgroup_size_control) { + if (has_ext_subgroup_size_control) { const auto features = GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); const auto properties = @@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami is_warp_potentially_bigger = true; } + if (has_ext_transform_feedback) { + const auto features = + GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); + const auto properties = + GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); + + if (features.transformFeedback && features.geometryStreams && + properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && + properties.transformFeedbackQueries && properties.transformFeedbackDraw) { + extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); + ext_transform_feedback = true; + } + } + return extensions; } @@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { const auto supported_features{physical.getFeatures(dldi)}; - is_shader_storage_img_read_without_format_supported = - supported_features.shaderStorageImageReadWithoutFormat; + is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 2c27ad730..6e656517f 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -122,11 +122,6 @@ public: return properties.limits.maxPushConstantsSize; } - /// Returns true if Shader storage Image Read Without Format supported. - bool IsShaderStorageImageReadWithoutFormatSupported() const { - return is_shader_storage_img_read_without_format_supported; - } - /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -147,6 +142,11 @@ public: return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; } + /// Returns true if formatless image load is supported. + bool IsFormatlessImageLoadSupported() const { + return is_formatless_image_load_supported; + } + /// Returns true if the device supports VK_EXT_scalar_block_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -167,6 +167,11 @@ public: return ext_shader_viewport_index_layer; } + /// Returns true if the device supports VK_EXT_transform_feedback. + bool IsExtTransformFeedbackSupported() const { + return ext_transform_feedback; + } + /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. bool IsNvDeviceDiagnosticCheckpoints() const { return nv_device_diagnostic_checkpoints; @@ -214,26 +219,26 @@ private: static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); - const vk::PhysicalDevice physical; ///< Physical device. - vk::DispatchLoaderDynamic dld; ///< Device function pointers. - vk::PhysicalDeviceProperties properties; ///< Device properties. - UniqueDevice logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 graphics_family{}; ///< Main graphics queue family index. - u32 present_family{}; ///< Main present queue family index. - vk::DriverIdKHR driver_id{}; ///< Driver ID. - vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. - bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool is_float16_supported{}; ///< Support for float16 arithmetics. - bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + const vk::PhysicalDevice physical; ///< Physical device. + vk::DispatchLoaderDynamic dld; ///< Device function pointers. + vk::PhysicalDeviceProperties properties; ///< Device properties. + UniqueDevice logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + vk::DriverIdKHR driver_id{}; ///< Driver ID. + vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool is_float16_supported{}; ///< Support for float16 arithmetics. + bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. + bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. - bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage - ///< image read without format // Telemetry parameters std::string vendor_name; ///< Device's driver name. diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ebf85f311..056ef495c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -273,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach specialization.workgroup_size = key.workgroup_size; specialization.shared_memory_size = key.shared_memory_size; - const SPIRVShader spirv_shader{ - Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), - shader->GetEntries()}; + const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, + shader->GetRegistry(), specialization), + shader->GetEntries()}; entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, update_descriptor_queue, spirv_shader); return *entry; @@ -324,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const auto& gpu = system.GPU().Maxwell3D(); Specialization specialization; - specialization.primitive_topology = fixed_state.input_assembly.topology; - if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) { + if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { ASSERT(fixed_state.input_assembly.point_size != 0.0f); specialization.point_size = fixed_state.input_assembly.point_size; } @@ -333,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; } specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; - specialization.tessellation.primitive = fixed_state.tessellation.primitive; - specialization.tessellation.spacing = fixed_state.tessellation.spacing; - specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; SPIRVProgram program; std::vector<vk::DescriptorSetLayoutBinding> bindings; @@ -356,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 const auto program_type = GetShaderType(program_enum); const auto& entries = shader->GetEntries(); - program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), - entries}; + program[stage] = { + Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), + entries}; if (program_enum == Maxwell::ShaderProgram::VertexA) { // VertexB was combined with VertexA, so we skip the VertexB iteration diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e292526bb..21340c9a4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -132,6 +132,10 @@ public: return shader_ir; } + const VideoCommon::Shader::Registry& GetRegistry() const { + return registry; + } + const VideoCommon::Shader::ShaderIR& GetIR() const { return shader_ir; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2bcb17b56..f889019c1 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -347,6 +347,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); } + BeginTransformFeedback(); + const auto pipeline_layout = pipeline.GetLayout(); const auto descriptor_set = pipeline.CommitDescriptorSet(); scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { @@ -356,6 +358,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { } draw_params.Draw(cmdbuf, dld); }); + + EndTransformFeedback(); } void RasterizerVulkan::Clear() { @@ -738,6 +742,44 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateStencilFaces(regs); } +void RasterizerVulkan::BeginTransformFeedback() { + const auto& regs = system.GPU().Maxwell3D().regs; + if (regs.tfb_enabled == 0) { + return; + } + + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || + regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || + regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); + + UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); + UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); + UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); + + const auto& binding = regs.tfb_bindings[0]; + UNIMPLEMENTED_IF(binding.buffer_enable == 0); + UNIMPLEMENTED_IF(binding.buffer_offset != 0); + + const GPUVAddr gpu_addr = binding.Address(); + const std::size_t size = binding.buffer_size; + const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + + scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) { + cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld); + cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld); + }); +} + +void RasterizerVulkan::EndTransformFeedback() { + const auto& regs = system.GPU().Maxwell3D().regs; + if (regs.tfb_enabled == 0) { + return; + } + + scheduler.Record( + [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); }); +} + void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, BufferBindings& buffer_bindings) { const auto& regs = system.GPU().Maxwell3D().regs; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 96ea05f0a..b2e73d98d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -169,6 +169,10 @@ private: void UpdateDynamicStates(); + void BeginTransformFeedback(); + + void EndTransformFeedback(); + bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index cfcca5af0..b2c298051 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -5,7 +5,9 @@ #include <functional> #include <limits> #include <map> +#include <optional> #include <type_traits> +#include <unordered_map> #include <utility> #include <fmt/format.h> @@ -24,6 +26,7 @@ #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" +#include "video_core/shader/transform_feedback.h" namespace Vulkan { @@ -93,6 +96,12 @@ struct VertexIndices { std::optional<u32> clip_distances; }; +struct GenericVaryingDescription { + Id id = nullptr; + u32 first_element = 0; + bool is_scalar = false; +}; + spv::Dim GetSamplerDim(const Sampler& sampler) { ASSERT(!sampler.IsBuffer()); switch (sampler.GetType()) { @@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) { class SPIRVDecompiler final : public Sirit::Module { public: explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, - const Specialization& specialization) + const Registry& registry, const Specialization& specialization) : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, - specialization{specialization} { + registry{registry}, specialization{specialization} { + if (stage != ShaderType::Compute) { + transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); + } + AddCapability(spv::Capability::Shader); AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); AddCapability(spv::Capability::ImageQuery); @@ -286,6 +299,15 @@ public: AddExtension("SPV_KHR_variable_pointers"); AddExtension("SPV_KHR_shader_draw_parameters"); + if (!transform_feedback.empty()) { + if (device.IsExtTransformFeedbackSupported()) { + AddCapability(spv::Capability::TransformFeedback); + } else { + LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " + "supported on this device"); + } + } + if (ir.UsesLayer() || ir.UsesViewportIndex()) { if (ir.UsesViewportIndex()) { AddCapability(spv::Capability::MultiViewport); @@ -296,7 +318,7 @@ public: } } - if (device.IsShaderStorageImageReadWithoutFormatSupported()) { + if (device.IsFormatlessImageLoadSupported()) { AddCapability(spv::Capability::StorageImageReadWithoutFormat); } @@ -318,25 +340,29 @@ public: AddExecutionMode(main, spv::ExecutionMode::OutputVertices, header.common2.threads_per_input_primitive); break; - case ShaderType::TesselationEval: + case ShaderType::TesselationEval: { + const auto& info = registry.GetGraphicsInfo(); AddCapability(spv::Capability::Tessellation); AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); - AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); - AddExecutionMode(main, specialization.tessellation.clockwise + AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); + AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); + AddExecutionMode(main, info.tessellation_clockwise ? spv::ExecutionMode::VertexOrderCw : spv::ExecutionMode::VertexOrderCcw); break; - case ShaderType::Geometry: + } + case ShaderType::Geometry: { + const auto& info = registry.GetGraphicsInfo(); AddCapability(spv::Capability::Geometry); AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); + AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); AddExecutionMode(main, spv::ExecutionMode::OutputVertices, header.common4.max_output_vertices); // TODO(Rodrigo): Where can we get this info from? AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); break; + } case ShaderType::Fragment: AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); @@ -545,7 +571,8 @@ private: if (stage != ShaderType::Geometry) { return; } - const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); + const auto& info = registry.GetGraphicsInfo(); + const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); DeclareInputVertexArray(num_input); DeclareOutputVertex(); } @@ -742,12 +769,34 @@ private: } void DeclareOutputAttributes() { + if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { + return; + } + + UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); for (const auto index : ir.GetOutputAttributes()) { if (!IsGenericAttribute(index)) { continue; } - const u32 location = GetGenericAttributeLocation(index); - Id type = t_float4; + DeclareOutputAttribute(index); + } + } + + void DeclareOutputAttribute(Attribute::Index index) { + static constexpr std::string_view swizzle = "xyzw"; + + const u32 location = GetGenericAttributeLocation(index); + u8 element = 0; + while (element < 4) { + const std::size_t remainder = 4 - element; + + std::size_t num_components = remainder; + const std::optional tfb = GetTransformFeedbackInfo(index, element); + if (tfb) { + num_components = tfb->components; + } + + Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); Id varying_default = v_varying_default; if (IsOutputAttributeArray()) { const u32 num = GetNumOutputVertices(); @@ -760,13 +809,45 @@ private: } type = TypePointer(spv::StorageClass::Output, type); + std::string name = fmt::format("out_attr{}", location); + if (num_components < 4 || element > 0) { + name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); + } + const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); - Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); - output_attributes.emplace(index, id); + Name(AddGlobalVariable(id), name); + + GenericVaryingDescription description; + description.id = id; + description.first_element = element; + description.is_scalar = num_components == 1; + for (u32 i = 0; i < num_components; ++i) { + const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i); + output_attributes.emplace(offset, description); + } interfaces.push_back(id); Decorate(id, spv::Decoration::Location, location); + if (element > 0) { + Decorate(id, spv::Decoration::Component, static_cast<u32>(element)); + } + if (tfb && device.IsExtTransformFeedbackSupported()) { + Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer)); + Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride)); + Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset)); + } + + element += static_cast<u8>(num_components); + } + } + + std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { + const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); + const auto it = transform_feedback.find(location); + if (it == transform_feedback.end()) { + return {}; } + return it->second; } u32 DeclareConstantBuffers(u32 binding) { @@ -898,7 +979,7 @@ private: u32 GetNumInputVertices() const { switch (stage) { case ShaderType::Geometry: - return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); + return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); case ShaderType::TesselationControl: case ShaderType::TesselationEval: return NumInputPatches; @@ -1346,8 +1427,14 @@ private: } default: if (IsGenericAttribute(attribute)) { - const Id composite = output_attributes.at(attribute); - return {ArrayPass(t_out_float, composite, {element}), Type::Float}; + const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element); + const GenericVaryingDescription description = output_attributes.at(offset); + const Id composite = description.id; + std::vector<u32> indices; + if (!description.is_scalar) { + indices.push_back(element - description.first_element); + } + return {ArrayPass(t_out_float, composite, indices), Type::Float}; } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); @@ -1793,7 +1880,7 @@ private: } Expression ImageLoad(Operation operation) { - if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { + if (!device.IsFormatlessImageLoadSupported()) { return {v_float_zero, Type::Float}; } @@ -2258,11 +2345,11 @@ private: std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { switch (type) { case Type::Float: - return {nullptr, t_float2, t_float3, t_float4}; + return {t_float, t_float2, t_float3, t_float4}; case Type::Int: - return {nullptr, t_int2, t_int3, t_int4}; + return {t_int, t_int2, t_int3, t_int4}; case Type::Uint: - return {nullptr, t_uint2, t_uint3, t_uint4}; + return {t_uint, t_uint2, t_uint3, t_uint4}; default: UNIMPLEMENTED(); return {}; @@ -2495,7 +2582,9 @@ private: const ShaderIR& ir; const ShaderType stage; const Tegra::Shader::Header header; + const Registry& registry; const Specialization& specialization; + std::unordered_map<u8, VaryingTFB> transform_feedback; const Id t_void = Name(TypeVoid(), "void"); @@ -2584,7 +2673,7 @@ private: Id shared_memory{}; std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; std::map<Attribute::Index, Id> input_attributes; - std::map<Attribute::Index, Id> output_attributes; + std::unordered_map<u8, GenericVaryingDescription> output_attributes; std::map<u32, Id> constant_buffers; std::map<GlobalMemoryBase, Id> global_buffers; std::map<u32, TexelBuffer> texel_buffers; @@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { } std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, - ShaderType stage, const Specialization& specialization) { - return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); + ShaderType stage, const VideoCommon::Shader::Registry& registry, + const Specialization& specialization) { + return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f5dc14d9e..ffea4709e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -15,6 +15,7 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace Vulkan { @@ -91,17 +92,9 @@ struct Specialization final { u32 shared_memory_size{}; // Graphics specific - Maxwell::PrimitiveTopology primitive_topology{}; std::optional<float> point_size{}; std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; bool ndc_minus_one_to_one{}; - - // Tessellation specific - struct { - Maxwell::TessellationPrimitive primitive{}; - Maxwell::TessellationSpacing spacing{}; - bool clockwise{}; - } tessellation; }; // Old gcc versions don't consider this trivially copyable. // static_assert(std::is_trivially_copyable_v<Specialization>); @@ -114,6 +107,8 @@ struct SPIRVShader { ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage, const Specialization& specialization); + Tegra::Engines::ShaderType stage, + const VideoCommon::Shader::Registry& registry, + const Specialization& specialization); } // namespace Vulkan |
