diff options
| author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2020-05-15 01:43:44 -0300 |
|---|---|---|
| committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2020-05-21 23:18:37 -0300 |
| commit | e2b67a868b7191237374226218756c1a62fabd4e (patch) | |
| tree | 0ae75b1a89a5cdec2abf2433b20558f4aaab589b /src/video_core/renderer_vulkan | |
| parent | cf4ee279c6424ada927f74fcc34e013038af7228 (diff) | |
shader/other: Implement thread comparisons (NV_shader_thread_group)
Hardware S2R special registers match gl_Thread*MaskNV. We can trivially
implement these using Nvidia's extension on OpenGL or naively stubbing
them with the ARB instructions to match. This might cause issues if the
host device warp size doesn't match Nvidia's. That said, this is
unlikely on proper shaders.
Refer to the attached url for more documentation about these flags.
https://www.khronos.org/registry/OpenGL/extensions/NV/NV_shader_thread_group.txt
Diffstat (limited to 'src/video_core/renderer_vulkan')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 167e20e91..f4ccc9848 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -515,6 +515,16 @@ private: void DeclareCommon() { thread_id = DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); + thread_masks[0] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); + thread_masks[1] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); + thread_masks[2] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); + thread_masks[3] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); + thread_masks[4] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); } void DeclareVertex() { @@ -2175,6 +2185,13 @@ private: return {OpLoad(t_uint, thread_id), Type::Uint}; } + template <std::size_t index> + Expression ThreadMask(Operation) { + // TODO(Rodrigo): Handle devices with different warp sizes + const Id mask = thread_masks[index]; + return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; + } + Expression ShuffleIndexed(Operation operation) { const Id value = AsFloat(Visit(operation[0])); const Id index = AsUint(Visit(operation[1])); @@ -2639,6 +2656,11 @@ private: &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, &SPIRVDecompiler::ThreadId, + &SPIRVDecompiler::ThreadMask<0>, // Eq + &SPIRVDecompiler::ThreadMask<1>, // Ge + &SPIRVDecompiler::ThreadMask<2>, // Gt + &SPIRVDecompiler::ThreadMask<3>, // Le + &SPIRVDecompiler::ThreadMask<4>, // Lt &SPIRVDecompiler::ShuffleIndexed, &SPIRVDecompiler::MemoryBarrierGL, @@ -2763,6 +2785,7 @@ private: Id workgroup_id{}; Id local_invocation_id{}; Id thread_id{}; + std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt VertexIndices in_indices; VertexIndices out_indices; |
