aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2020-03-14 09:48:15 -0400
committerGitHub <noreply@github.com>2020-03-14 09:48:15 -0400
commit35145bd529c3517e2c366efc764a762092d96edf (patch)
tree58c80a2133092b990ca11f3a357d70fab2c5fd0b /src/video_core/renderer_vulkan
parent666d431ad8ee4e36f1b7f48d13f3fa63ba3675f2 (diff)
parent69c7a01f88a1839a3d950cab968accfa5100ea18 (diff)
Merge pull request #3490 from ReinUsesLisp/transform-feedbacks
video_core: Initial implementation of transform feedbacks
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp47
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h45
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp42
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp138
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h13
8 files changed, 236 insertions, 74 deletions
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 886bde3b9..3847bd722 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.occlusionQueryPrecise = true;
features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true;
- features.shaderStorageImageReadWithoutFormat =
- is_shader_storage_img_read_without_format_supported;
+ features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
features.shaderStorageImageWriteWithoutFormat = true;
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
@@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
}
+ vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
+ if (ext_transform_feedback) {
+ transform_feedback.transformFeedback = true;
+ transform_feedback.geometryStreams = true;
+ SetNext(next, transform_feedback);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
+ }
+
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
};
- extensions.reserve(14);
+ extensions.reserve(15);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
[[maybe_unused]] const bool nsight =
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
- bool khr_shader_float16_int8{};
- bool ext_subgroup_size_control{};
+ bool has_khr_shader_float16_int8{};
+ bool has_ext_subgroup_size_control{};
+ bool has_ext_transform_feedback{};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
Test(extension, khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
- Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
+ Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
+ false);
Test(extension, ext_depth_range_unrestricted,
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
Test(extension, ext_shader_viewport_index_layer,
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
- Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
+ Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
+ false);
+ Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
false);
if (Settings::values.renderer_debug) {
Test(extension, nv_device_diagnostic_checkpoints,
@@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
}
- if (khr_shader_float16_int8) {
+ if (has_khr_shader_float16_int8) {
is_float16_supported =
GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
- if (ext_subgroup_size_control) {
+ if (has_ext_subgroup_size_control) {
const auto features =
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
const auto properties =
@@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
is_warp_potentially_bigger = true;
}
+ if (has_ext_transform_feedback) {
+ const auto features =
+ GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
+ const auto properties =
+ GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
+
+ if (features.transformFeedback && features.geometryStreams &&
+ properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
+ properties.transformFeedbackQueries && properties.transformFeedbackDraw) {
+ extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
+ ext_transform_feedback = true;
+ }
+ }
+
return extensions;
}
@@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
const auto supported_features{physical.getFeatures(dldi)};
- is_shader_storage_img_read_without_format_supported =
- supported_features.shaderStorageImageReadWithoutFormat;
+ is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
}
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 2c27ad730..6e656517f 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,11 +122,6 @@ public:
return properties.limits.maxPushConstantsSize;
}
- /// Returns true if Shader storage Image Read Without Format supported.
- bool IsShaderStorageImageReadWithoutFormatSupported() const {
- return is_shader_storage_img_read_without_format_supported;
- }
-
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
@@ -147,6 +142,11 @@ public:
return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
}
+ /// Returns true if formatless image load is supported.
+ bool IsFormatlessImageLoadSupported() const {
+ return is_formatless_image_load_supported;
+ }
+
/// Returns true if the device supports VK_EXT_scalar_block_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
@@ -167,6 +167,11 @@ public:
return ext_shader_viewport_index_layer;
}
+ /// Returns true if the device supports VK_EXT_transform_feedback.
+ bool IsExtTransformFeedbackSupported() const {
+ return ext_transform_feedback;
+ }
+
/// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
bool IsNvDeviceDiagnosticCheckpoints() const {
return nv_device_diagnostic_checkpoints;
@@ -214,26 +219,26 @@ private:
static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
- const vk::PhysicalDevice physical; ///< Physical device.
- vk::DispatchLoaderDynamic dld; ///< Device function pointers.
- vk::PhysicalDeviceProperties properties; ///< Device properties.
- UniqueDevice logical; ///< Logical device.
- vk::Queue graphics_queue; ///< Main graphics queue.
- vk::Queue present_queue; ///< Main present queue.
- u32 graphics_family{}; ///< Main graphics queue family index.
- u32 present_family{}; ///< Main present queue family index.
- vk::DriverIdKHR driver_id{}; ///< Driver ID.
- vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
- bool is_optimal_astc_supported{}; ///< Support for native ASTC.
- bool is_float16_supported{}; ///< Support for float16 arithmetics.
- bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
+ const vk::PhysicalDevice physical; ///< Physical device.
+ vk::DispatchLoaderDynamic dld; ///< Device function pointers.
+ vk::PhysicalDeviceProperties properties; ///< Device properties.
+ UniqueDevice logical; ///< Logical device.
+ vk::Queue graphics_queue; ///< Main graphics queue.
+ vk::Queue present_queue; ///< Main present queue.
+ u32 graphics_family{}; ///< Main graphics queue family index.
+ u32 present_family{}; ///< Main present queue family index.
+ vk::DriverIdKHR driver_id{}; ///< Driver ID.
+ vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
+ bool is_optimal_astc_supported{}; ///< Support for native ASTC.
+ bool is_float16_supported{}; ///< Support for float16 arithmetics.
+ bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
+ bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
+ bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
- bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
- ///< image read without format
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index ebf85f311..056ef495c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -273,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
specialization.workgroup_size = key.workgroup_size;
specialization.shared_memory_size = key.shared_memory_size;
- const SPIRVShader spirv_shader{
- Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
- shader->GetEntries()};
+ const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
+ shader->GetRegistry(), specialization),
+ shader->GetEntries()};
entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, spirv_shader);
return *entry;
@@ -324,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto& gpu = system.GPU().Maxwell3D();
Specialization specialization;
- specialization.primitive_topology = fixed_state.input_assembly.topology;
- if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
+ if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
ASSERT(fixed_state.input_assembly.point_size != 0.0f);
specialization.point_size = fixed_state.input_assembly.point_size;
}
@@ -333,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
}
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
- specialization.tessellation.primitive = fixed_state.tessellation.primitive;
- specialization.tessellation.spacing = fixed_state.tessellation.spacing;
- specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
SPIRVProgram program;
std::vector<vk::DescriptorSetLayoutBinding> bindings;
@@ -356,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
const auto program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
- program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
- entries};
+ program[stage] = {
+ Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
+ entries};
if (program_enum == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index e292526bb..21340c9a4 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -132,6 +132,10 @@ public:
return shader_ir;
}
+ const VideoCommon::Shader::Registry& GetRegistry() const {
+ return registry;
+ }
+
const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 2bcb17b56..f889019c1 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -347,6 +347,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
[&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
}
+ BeginTransformFeedback();
+
const auto pipeline_layout = pipeline.GetLayout();
const auto descriptor_set = pipeline.CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
@@ -356,6 +358,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
}
draw_params.Draw(cmdbuf, dld);
});
+
+ EndTransformFeedback();
}
void RasterizerVulkan::Clear() {
@@ -738,6 +742,44 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateStencilFaces(regs);
}
+void RasterizerVulkan::BeginTransformFeedback() {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ if (regs.tfb_enabled == 0) {
+ return;
+ }
+
+ UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
+
+ UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
+ UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
+ UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
+
+ const auto& binding = regs.tfb_bindings[0];
+ UNIMPLEMENTED_IF(binding.buffer_enable == 0);
+ UNIMPLEMENTED_IF(binding.buffer_offset != 0);
+
+ const GPUVAddr gpu_addr = binding.Address();
+ const std::size_t size = binding.buffer_size;
+ const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+
+ scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) {
+ cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld);
+ cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld);
+ });
+}
+
+void RasterizerVulkan::EndTransformFeedback() {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ if (regs.tfb_enabled == 0) {
+ return;
+ }
+
+ scheduler.Record(
+ [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); });
+}
+
void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
BufferBindings& buffer_bindings) {
const auto& regs = system.GPU().Maxwell3D().regs;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 96ea05f0a..b2e73d98d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -169,6 +169,10 @@ private:
void UpdateDynamicStates();
+ void BeginTransformFeedback();
+
+ void EndTransformFeedback();
+
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index cfcca5af0..b2c298051 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -5,7 +5,9 @@
#include <functional>
#include <limits>
#include <map>
+#include <optional>
#include <type_traits>
+#include <unordered_map>
#include <utility>
#include <fmt/format.h>
@@ -24,6 +26,7 @@
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader/transform_feedback.h"
namespace Vulkan {
@@ -93,6 +96,12 @@ struct VertexIndices {
std::optional<u32> clip_distances;
};
+struct GenericVaryingDescription {
+ Id id = nullptr;
+ u32 first_element = 0;
+ bool is_scalar = false;
+};
+
spv::Dim GetSamplerDim(const Sampler& sampler) {
ASSERT(!sampler.IsBuffer());
switch (sampler.GetType()) {
@@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) {
class SPIRVDecompiler final : public Sirit::Module {
public:
explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
- const Specialization& specialization)
+ const Registry& registry, const Specialization& specialization)
: Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
- specialization{specialization} {
+ registry{registry}, specialization{specialization} {
+ if (stage != ShaderType::Compute) {
+ transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
+ }
+
AddCapability(spv::Capability::Shader);
AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
AddCapability(spv::Capability::ImageQuery);
@@ -286,6 +299,15 @@ public:
AddExtension("SPV_KHR_variable_pointers");
AddExtension("SPV_KHR_shader_draw_parameters");
+ if (!transform_feedback.empty()) {
+ if (device.IsExtTransformFeedbackSupported()) {
+ AddCapability(spv::Capability::TransformFeedback);
+ } else {
+ LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
+ "supported on this device");
+ }
+ }
+
if (ir.UsesLayer() || ir.UsesViewportIndex()) {
if (ir.UsesViewportIndex()) {
AddCapability(spv::Capability::MultiViewport);
@@ -296,7 +318,7 @@ public:
}
}
- if (device.IsShaderStorageImageReadWithoutFormatSupported()) {
+ if (device.IsFormatlessImageLoadSupported()) {
AddCapability(spv::Capability::StorageImageReadWithoutFormat);
}
@@ -318,25 +340,29 @@ public:
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
header.common2.threads_per_input_primitive);
break;
- case ShaderType::TesselationEval:
+ case ShaderType::TesselationEval: {
+ const auto& info = registry.GetGraphicsInfo();
AddCapability(spv::Capability::Tessellation);
AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
- AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive));
- AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing));
- AddExecutionMode(main, specialization.tessellation.clockwise
+ AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
+ AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
+ AddExecutionMode(main, info.tessellation_clockwise
? spv::ExecutionMode::VertexOrderCw
: spv::ExecutionMode::VertexOrderCcw);
break;
- case ShaderType::Geometry:
+ }
+ case ShaderType::Geometry: {
+ const auto& info = registry.GetGraphicsInfo();
AddCapability(spv::Capability::Geometry);
AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
- AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology));
+ AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
header.common4.max_output_vertices);
// TODO(Rodrigo): Where can we get this info from?
AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
break;
+ }
case ShaderType::Fragment:
AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
@@ -545,7 +571,8 @@ private:
if (stage != ShaderType::Geometry) {
return;
}
- const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
+ const auto& info = registry.GetGraphicsInfo();
+ const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
DeclareInputVertexArray(num_input);
DeclareOutputVertex();
}
@@ -742,12 +769,34 @@ private:
}
void DeclareOutputAttributes() {
+ if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
+ return;
+ }
+
+ UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
for (const auto index : ir.GetOutputAttributes()) {
if (!IsGenericAttribute(index)) {
continue;
}
- const u32 location = GetGenericAttributeLocation(index);
- Id type = t_float4;
+ DeclareOutputAttribute(index);
+ }
+ }
+
+ void DeclareOutputAttribute(Attribute::Index index) {
+ static constexpr std::string_view swizzle = "xyzw";
+
+ const u32 location = GetGenericAttributeLocation(index);
+ u8 element = 0;
+ while (element < 4) {
+ const std::size_t remainder = 4 - element;
+
+ std::size_t num_components = remainder;
+ const std::optional tfb = GetTransformFeedbackInfo(index, element);
+ if (tfb) {
+ num_components = tfb->components;
+ }
+
+ Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
Id varying_default = v_varying_default;
if (IsOutputAttributeArray()) {
const u32 num = GetNumOutputVertices();
@@ -760,13 +809,45 @@ private:
}
type = TypePointer(spv::StorageClass::Output, type);
+ std::string name = fmt::format("out_attr{}", location);
+ if (num_components < 4 || element > 0) {
+ name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
+ }
+
const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
- Name(AddGlobalVariable(id), fmt::format("out_attr{}", location));
- output_attributes.emplace(index, id);
+ Name(AddGlobalVariable(id), name);
+
+ GenericVaryingDescription description;
+ description.id = id;
+ description.first_element = element;
+ description.is_scalar = num_components == 1;
+ for (u32 i = 0; i < num_components; ++i) {
+ const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
+ output_attributes.emplace(offset, description);
+ }
interfaces.push_back(id);
Decorate(id, spv::Decoration::Location, location);
+ if (element > 0) {
+ Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
+ }
+ if (tfb && device.IsExtTransformFeedbackSupported()) {
+ Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
+ Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
+ Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
+ }
+
+ element += static_cast<u8>(num_components);
+ }
+ }
+
+ std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
+ const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
+ const auto it = transform_feedback.find(location);
+ if (it == transform_feedback.end()) {
+ return {};
}
+ return it->second;
}
u32 DeclareConstantBuffers(u32 binding) {
@@ -898,7 +979,7 @@ private:
u32 GetNumInputVertices() const {
switch (stage) {
case ShaderType::Geometry:
- return GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
+ return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
case ShaderType::TesselationControl:
case ShaderType::TesselationEval:
return NumInputPatches;
@@ -1346,8 +1427,14 @@ private:
}
default:
if (IsGenericAttribute(attribute)) {
- const Id composite = output_attributes.at(attribute);
- return {ArrayPass(t_out_float, composite, {element}), Type::Float};
+ const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
+ const GenericVaryingDescription description = output_attributes.at(offset);
+ const Id composite = description.id;
+ std::vector<u32> indices;
+ if (!description.is_scalar) {
+ indices.push_back(element - description.first_element);
+ }
+ return {ArrayPass(t_out_float, composite, indices), Type::Float};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
static_cast<u32>(attribute));
@@ -1793,7 +1880,7 @@ private:
}
Expression ImageLoad(Operation operation) {
- if (!device.IsShaderStorageImageReadWithoutFormatSupported()) {
+ if (!device.IsFormatlessImageLoadSupported()) {
return {v_float_zero, Type::Float};
}
@@ -2258,11 +2345,11 @@ private:
std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
switch (type) {
case Type::Float:
- return {nullptr, t_float2, t_float3, t_float4};
+ return {t_float, t_float2, t_float3, t_float4};
case Type::Int:
- return {nullptr, t_int2, t_int3, t_int4};
+ return {t_int, t_int2, t_int3, t_int4};
case Type::Uint:
- return {nullptr, t_uint2, t_uint3, t_uint4};
+ return {t_uint, t_uint2, t_uint3, t_uint4};
default:
UNIMPLEMENTED();
return {};
@@ -2495,7 +2582,9 @@ private:
const ShaderIR& ir;
const ShaderType stage;
const Tegra::Shader::Header header;
+ const Registry& registry;
const Specialization& specialization;
+ std::unordered_map<u8, VaryingTFB> transform_feedback;
const Id t_void = Name(TypeVoid(), "void");
@@ -2584,7 +2673,7 @@ private:
Id shared_memory{};
std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
std::map<Attribute::Index, Id> input_attributes;
- std::map<Attribute::Index, Id> output_attributes;
+ std::unordered_map<u8, GenericVaryingDescription> output_attributes;
std::map<u32, Id> constant_buffers;
std::map<GlobalMemoryBase, Id> global_buffers;
std::map<u32, TexelBuffer> texel_buffers;
@@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
}
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
- ShaderType stage, const Specialization& specialization) {
- return SPIRVDecompiler(device, ir, stage, specialization).Assemble();
+ ShaderType stage, const VideoCommon::Shader::Registry& registry,
+ const Specialization& specialization) {
+ return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f5dc14d9e..ffea4709e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -15,6 +15,7 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace Vulkan {
@@ -91,17 +92,9 @@ struct Specialization final {
u32 shared_memory_size{};
// Graphics specific
- Maxwell::PrimitiveTopology primitive_topology{};
std::optional<float> point_size{};
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
-
- // Tessellation specific
- struct {
- Maxwell::TessellationPrimitive primitive{};
- Maxwell::TessellationSpacing spacing{};
- bool clockwise{};
- } tessellation;
};
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Specialization>);
@@ -114,6 +107,8 @@ struct SPIRVShader {
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
- Tegra::Engines::ShaderType stage, const Specialization& specialization);
+ Tegra::Engines::ShaderType stage,
+ const VideoCommon::Shader::Registry& registry,
+ const Specialization& specialization);
} // namespace Vulkan