From 75059c46d645e42e8da31fb97d003047c67b004b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 9 Jul 2021 13:59:09 -0400 Subject: thread_worker: Fix compile time error state is unused in the branch where with_state is false --- src/common/thread_worker.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/common') diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h index 8272985ff..0a975a869 100644 --- a/src/common/thread_worker.h +++ b/src/common/thread_worker.h @@ -39,7 +39,7 @@ public: const auto lambda = [this, func](std::stop_token stop_token) { Common::SetCurrentThreadName(thread_name.c_str()); { - std::conditional_t state{func()}; + [[maybe_unused]] std::conditional_t state{func()}; while (!stop_token.stop_requested()) { Task task; { -- cgit v1.2.3 From c67d64365a712830fe140dd36e24e2efd9b8a812 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 20:52:12 -0300 Subject: shader: Remove old shader management --- CMakeModules/GenerateSCMRev.cmake | 56 +- externals/Vulkan-Headers | 2 +- externals/sirit | 2 +- src/common/CMakeLists.txt | 56 +- src/video_core/CMakeLists.txt | 64 - .../engines/const_buffer_engine_interface.h | 103 - src/video_core/engines/kepler_compute.cpp | 44 +- src/video_core/engines/kepler_compute.h | 20 +- src/video_core/engines/maxwell_3d.cpp | 38 - src/video_core/engines/maxwell_3d.h | 20 +- src/video_core/guest_driver.cpp | 37 - src/video_core/guest_driver.h | 46 - src/video_core/rasterizer_interface.h | 16 +- .../renderer_opengl/gl_arb_decompiler.cpp | 2124 -------------- src/video_core/renderer_opengl/gl_arb_decompiler.h | 29 - src/video_core/renderer_opengl/gl_rasterizer.cpp | 314 +- src/video_core/renderer_opengl/gl_rasterizer.h | 33 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 564 +--- src/video_core/renderer_opengl/gl_shader_cache.h | 102 +- .../renderer_opengl/gl_shader_decompiler.cpp | 2986 -------------------- .../renderer_opengl/gl_shader_decompiler.h | 69 - .../renderer_opengl/gl_shader_disk_cache.cpp | 482 ---- .../renderer_opengl/gl_shader_disk_cache.h | 176 -- src/video_core/renderer_vulkan/blit_image.cpp | 1 - .../renderer_vulkan/vk_compute_pipeline.cpp | 136 +- .../renderer_vulkan/vk_compute_pipeline.h | 47 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 484 ---- .../renderer_vulkan/vk_graphics_pipeline.h | 103 - .../renderer_vulkan/vk_pipeline_cache.cpp | 375 +-- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 91 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 361 +-- src/video_core/renderer_vulkan/vk_rasterizer.h | 47 +- src/video_core/shader/ast.cpp | 752 ----- src/video_core/shader/ast.h | 398 --- src/video_core/shader/async_shaders.cpp | 234 -- src/video_core/shader/async_shaders.h | 138 - src/video_core/shader/compiler_settings.cpp | 26 - src/video_core/shader/compiler_settings.h | 26 - src/video_core/shader/control_flow.cpp | 751 ----- src/video_core/shader/control_flow.h | 117 - src/video_core/shader/decode.cpp | 368 --- src/video_core/shader/decode/arithmetic.cpp | 166 -- src/video_core/shader/decode/arithmetic_half.cpp | 101 - .../shader/decode/arithmetic_half_immediate.cpp | 54 - .../shader/decode/arithmetic_immediate.cpp | 53 - .../shader/decode/arithmetic_integer.cpp | 375 --- .../shader/decode/arithmetic_integer_immediate.cpp | 99 - src/video_core/shader/decode/bfe.cpp | 77 - src/video_core/shader/decode/bfi.cpp | 45 - src/video_core/shader/decode/conversion.cpp | 321 --- src/video_core/shader/decode/ffma.cpp | 62 - src/video_core/shader/decode/float_set.cpp | 58 - .../shader/decode/float_set_predicate.cpp | 57 - src/video_core/shader/decode/half_set.cpp | 115 - .../shader/decode/half_set_predicate.cpp | 80 - src/video_core/shader/decode/hfma2.cpp | 73 - src/video_core/shader/decode/image.cpp | 536 ---- src/video_core/shader/decode/integer_set.cpp | 49 - .../shader/decode/integer_set_predicate.cpp | 53 - src/video_core/shader/decode/memory.cpp | 493 ---- src/video_core/shader/decode/other.cpp | 322 --- .../shader/decode/predicate_set_predicate.cpp | 68 - .../shader/decode/predicate_set_register.cpp | 46 - .../shader/decode/register_set_predicate.cpp | 86 - src/video_core/shader/decode/shift.cpp | 153 - src/video_core/shader/decode/texture.cpp | 935 ------ src/video_core/shader/decode/video.cpp | 169 -- src/video_core/shader/decode/warp.cpp | 117 - src/video_core/shader/decode/xmad.cpp | 156 - src/video_core/shader/expr.cpp | 93 - src/video_core/shader/expr.h | 156 - src/video_core/shader/memory_util.cpp | 76 - src/video_core/shader/memory_util.h | 43 - src/video_core/shader/node.h | 701 ----- src/video_core/shader/node_helper.cpp | 115 - src/video_core/shader/node_helper.h | 71 - src/video_core/shader/registry.cpp | 181 -- src/video_core/shader/registry.h | 172 -- src/video_core/shader/shader_ir.cpp | 464 --- src/video_core/shader/shader_ir.h | 479 ---- src/video_core/shader/track.cpp | 236 -- src/video_core/shader/transform_feedback.cpp | 115 - src/video_core/shader/transform_feedback.h | 23 - 83 files changed, 57 insertions(+), 19625 deletions(-) delete mode 100644 src/video_core/engines/const_buffer_engine_interface.h delete mode 100644 src/video_core/guest_driver.cpp delete mode 100644 src/video_core/guest_driver.h delete mode 100644 src/video_core/renderer_opengl/gl_arb_decompiler.cpp delete mode 100644 src/video_core/renderer_opengl/gl_arb_decompiler.h delete mode 100644 src/video_core/renderer_opengl/gl_shader_decompiler.cpp delete mode 100644 src/video_core/renderer_opengl/gl_shader_decompiler.h delete mode 100644 src/video_core/renderer_opengl/gl_shader_disk_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_shader_disk_cache.h delete mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h delete mode 100644 src/video_core/shader/ast.cpp delete mode 100644 src/video_core/shader/ast.h delete mode 100644 src/video_core/shader/async_shaders.cpp delete mode 100644 src/video_core/shader/async_shaders.h delete mode 100644 src/video_core/shader/compiler_settings.cpp delete mode 100644 src/video_core/shader/compiler_settings.h delete mode 100644 src/video_core/shader/control_flow.cpp delete mode 100644 src/video_core/shader/control_flow.h delete mode 100644 src/video_core/shader/decode.cpp delete mode 100644 src/video_core/shader/decode/arithmetic.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_half.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_half_immediate.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_immediate.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_integer.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_integer_immediate.cpp delete mode 100644 src/video_core/shader/decode/bfe.cpp delete mode 100644 src/video_core/shader/decode/bfi.cpp delete mode 100644 src/video_core/shader/decode/conversion.cpp delete mode 100644 src/video_core/shader/decode/ffma.cpp delete mode 100644 src/video_core/shader/decode/float_set.cpp delete mode 100644 src/video_core/shader/decode/float_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/half_set.cpp delete mode 100644 src/video_core/shader/decode/half_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/hfma2.cpp delete mode 100644 src/video_core/shader/decode/image.cpp delete mode 100644 src/video_core/shader/decode/integer_set.cpp delete mode 100644 src/video_core/shader/decode/integer_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/memory.cpp delete mode 100644 src/video_core/shader/decode/other.cpp delete mode 100644 src/video_core/shader/decode/predicate_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/predicate_set_register.cpp delete mode 100644 src/video_core/shader/decode/register_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/shift.cpp delete mode 100644 src/video_core/shader/decode/texture.cpp delete mode 100644 src/video_core/shader/decode/video.cpp delete mode 100644 src/video_core/shader/decode/warp.cpp delete mode 100644 src/video_core/shader/decode/xmad.cpp delete mode 100644 src/video_core/shader/expr.cpp delete mode 100644 src/video_core/shader/expr.h delete mode 100644 src/video_core/shader/memory_util.cpp delete mode 100644 src/video_core/shader/memory_util.h delete mode 100644 src/video_core/shader/node.h delete mode 100644 src/video_core/shader/node_helper.cpp delete mode 100644 src/video_core/shader/node_helper.h delete mode 100644 src/video_core/shader/registry.cpp delete mode 100644 src/video_core/shader/registry.h delete mode 100644 src/video_core/shader/shader_ir.cpp delete mode 100644 src/video_core/shader/shader_ir.h delete mode 100644 src/video_core/shader/track.cpp delete mode 100644 src/video_core/shader/transform_feedback.cpp delete mode 100644 src/video_core/shader/transform_feedback.h (limited to 'src/common') diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 311ba1c2e..77358768e 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -51,61 +51,7 @@ endif() # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) set(VIDEO_CORE "${SRC_DIR}/src/video_core") set(HASH_FILES - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" - "${VIDEO_CORE}/shader/decode/arithmetic.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" - "${VIDEO_CORE}/shader/decode/bfe.cpp" - "${VIDEO_CORE}/shader/decode/bfi.cpp" - "${VIDEO_CORE}/shader/decode/conversion.cpp" - "${VIDEO_CORE}/shader/decode/ffma.cpp" - "${VIDEO_CORE}/shader/decode/float_set.cpp" - "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/half_set.cpp" - "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/hfma2.cpp" - "${VIDEO_CORE}/shader/decode/image.cpp" - "${VIDEO_CORE}/shader/decode/integer_set.cpp" - "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/memory.cpp" - "${VIDEO_CORE}/shader/decode/texture.cpp" - "${VIDEO_CORE}/shader/decode/other.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" - "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/shift.cpp" - "${VIDEO_CORE}/shader/decode/video.cpp" - "${VIDEO_CORE}/shader/decode/warp.cpp" - "${VIDEO_CORE}/shader/decode/xmad.cpp" - "${VIDEO_CORE}/shader/ast.cpp" - "${VIDEO_CORE}/shader/ast.h" - "${VIDEO_CORE}/shader/compiler_settings.cpp" - "${VIDEO_CORE}/shader/compiler_settings.h" - "${VIDEO_CORE}/shader/control_flow.cpp" - "${VIDEO_CORE}/shader/control_flow.h" - "${VIDEO_CORE}/shader/decode.cpp" - "${VIDEO_CORE}/shader/expr.cpp" - "${VIDEO_CORE}/shader/expr.h" - "${VIDEO_CORE}/shader/node.h" - "${VIDEO_CORE}/shader/node_helper.cpp" - "${VIDEO_CORE}/shader/node_helper.h" - "${VIDEO_CORE}/shader/registry.cpp" - "${VIDEO_CORE}/shader/registry.h" - "${VIDEO_CORE}/shader/shader_ir.cpp" - "${VIDEO_CORE}/shader/shader_ir.h" - "${VIDEO_CORE}/shader/track.cpp" - "${VIDEO_CORE}/shader/transform_feedback.cpp" - "${VIDEO_CORE}/shader/transform_feedback.h" + # ... ) set(COMBINED "") foreach (F IN LISTS HASH_FILES) diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers index 8188e3fbb..07c4a37bc 160000 --- a/externals/Vulkan-Headers +++ b/externals/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 8188e3fbbc105591064093440f88081fb957d4f0 +Subproject commit 07c4a37bcf41ea50aef6e98236abdfe8089fb4c6 diff --git a/externals/sirit b/externals/sirit index 200310e8f..a39596358 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 200310e8faa756b9869dd6dfc902c255246ac74a +Subproject commit a39596358a3a5488c06554c0c15184a6af71e433 diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e03fffd8d..c92266a17 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -32,61 +32,7 @@ add_custom_command(OUTPUT scm_rev.cpp DEPENDS # WARNING! It was too much work to try and make a common location for this list, # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" - "${VIDEO_CORE}/shader/decode/arithmetic.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" - "${VIDEO_CORE}/shader/decode/bfe.cpp" - "${VIDEO_CORE}/shader/decode/bfi.cpp" - "${VIDEO_CORE}/shader/decode/conversion.cpp" - "${VIDEO_CORE}/shader/decode/ffma.cpp" - "${VIDEO_CORE}/shader/decode/float_set.cpp" - "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/half_set.cpp" - "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/hfma2.cpp" - "${VIDEO_CORE}/shader/decode/image.cpp" - "${VIDEO_CORE}/shader/decode/integer_set.cpp" - "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/memory.cpp" - "${VIDEO_CORE}/shader/decode/texture.cpp" - "${VIDEO_CORE}/shader/decode/other.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" - "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/shift.cpp" - "${VIDEO_CORE}/shader/decode/video.cpp" - "${VIDEO_CORE}/shader/decode/warp.cpp" - "${VIDEO_CORE}/shader/decode/xmad.cpp" - "${VIDEO_CORE}/shader/ast.cpp" - "${VIDEO_CORE}/shader/ast.h" - "${VIDEO_CORE}/shader/compiler_settings.cpp" - "${VIDEO_CORE}/shader/compiler_settings.h" - "${VIDEO_CORE}/shader/control_flow.cpp" - "${VIDEO_CORE}/shader/control_flow.h" - "${VIDEO_CORE}/shader/decode.cpp" - "${VIDEO_CORE}/shader/expr.cpp" - "${VIDEO_CORE}/shader/expr.h" - "${VIDEO_CORE}/shader/node.h" - "${VIDEO_CORE}/shader/node_helper.cpp" - "${VIDEO_CORE}/shader/node_helper.h" - "${VIDEO_CORE}/shader/registry.cpp" - "${VIDEO_CORE}/shader/registry.h" - "${VIDEO_CORE}/shader/shader_ir.cpp" - "${VIDEO_CORE}/shader/shader_ir.h" - "${VIDEO_CORE}/shader/track.cpp" - "${VIDEO_CORE}/shader/transform_feedback.cpp" - "${VIDEO_CORE}/shader/transform_feedback.h" + # ... # and also check that the scm_rev files haven't changed "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e4de55f4d..c5ce71706 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,7 +29,6 @@ add_library(video_core STATIC dirty_flags.h dma_pusher.cpp dma_pusher.h - engines/const_buffer_engine_interface.h engines/const_buffer_info.h engines/engine_interface.h engines/engine_upload.cpp @@ -61,8 +60,6 @@ add_library(video_core STATIC gpu.h gpu_thread.cpp gpu_thread.h - guest_driver.cpp - guest_driver.h memory_manager.cpp memory_manager.h query_cache.h @@ -71,8 +68,6 @@ add_library(video_core STATIC rasterizer_interface.h renderer_base.cpp renderer_base.h - renderer_opengl/gl_arb_decompiler.cpp - renderer_opengl/gl_arb_decompiler.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_device.cpp @@ -85,10 +80,6 @@ add_library(video_core STATIC renderer_opengl/gl_resource_manager.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h - renderer_opengl/gl_shader_decompiler.cpp - renderer_opengl/gl_shader_decompiler.h - renderer_opengl/gl_shader_disk_cache.cpp - renderer_opengl/gl_shader_disk_cache.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp @@ -128,8 +119,6 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h - renderer_vulkan/vk_graphics_pipeline.cpp - renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp @@ -142,8 +131,6 @@ add_library(video_core STATIC renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_scheduler.cpp renderer_vulkan/vk_scheduler.h - renderer_vulkan/vk_shader_decompiler.cpp - renderer_vulkan/vk_shader_decompiler.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_staging_buffer_pool.cpp @@ -159,57 +146,6 @@ add_library(video_core STATIC shader_cache.h shader_notify.cpp shader_notify.h - shader/decode/arithmetic.cpp - shader/decode/arithmetic_immediate.cpp - shader/decode/bfe.cpp - shader/decode/bfi.cpp - shader/decode/shift.cpp - shader/decode/arithmetic_integer.cpp - shader/decode/arithmetic_integer_immediate.cpp - shader/decode/arithmetic_half.cpp - shader/decode/arithmetic_half_immediate.cpp - shader/decode/ffma.cpp - shader/decode/hfma2.cpp - shader/decode/conversion.cpp - shader/decode/memory.cpp - shader/decode/texture.cpp - shader/decode/image.cpp - shader/decode/float_set_predicate.cpp - shader/decode/integer_set_predicate.cpp - shader/decode/half_set_predicate.cpp - shader/decode/predicate_set_register.cpp - shader/decode/predicate_set_predicate.cpp - shader/decode/register_set_predicate.cpp - shader/decode/float_set.cpp - shader/decode/integer_set.cpp - shader/decode/half_set.cpp - shader/decode/video.cpp - shader/decode/warp.cpp - shader/decode/xmad.cpp - shader/decode/other.cpp - shader/ast.cpp - shader/ast.h - shader/async_shaders.cpp - shader/async_shaders.h - shader/compiler_settings.cpp - shader/compiler_settings.h - shader/control_flow.cpp - shader/control_flow.h - shader/decode.cpp - shader/expr.cpp - shader/expr.h - shader/memory_util.cpp - shader/memory_util.h - shader/node_helper.cpp - shader/node_helper.h - shader/node.h - shader/registry.cpp - shader/registry.h - shader/shader_ir.cpp - shader/shader_ir.h - shader/track.cpp - shader/transform_feedback.cpp - shader/transform_feedback.h surface.cpp surface.h texture_cache/accelerated_swizzle.cpp diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h deleted file mode 100644 index f46e81bb7..000000000 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include "common/bit_field.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" -#include "video_core/textures/texture.h" - -namespace Tegra::Engines { - -struct SamplerDescriptor { - union { - u32 raw = 0; - BitField<0, 2, Tegra::Shader::TextureType> texture_type; - BitField<2, 3, Tegra::Texture::ComponentType> r_type; - BitField<5, 1, u32> is_array; - BitField<6, 1, u32> is_buffer; - BitField<7, 1, u32> is_shadow; - BitField<8, 3, Tegra::Texture::ComponentType> g_type; - BitField<11, 3, Tegra::Texture::ComponentType> b_type; - BitField<14, 3, Tegra::Texture::ComponentType> a_type; - BitField<17, 7, Tegra::Texture::TextureFormat> format; - }; - - bool operator==(const SamplerDescriptor& rhs) const noexcept { - return raw == rhs.raw; - } - - bool operator!=(const SamplerDescriptor& rhs) const noexcept { - return !operator==(rhs); - } - - static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { - using Tegra::Shader::TextureType; - SamplerDescriptor result; - - result.format.Assign(tic.format.Value()); - result.r_type.Assign(tic.r_type.Value()); - result.g_type.Assign(tic.g_type.Value()); - result.b_type.Assign(tic.b_type.Value()); - result.a_type.Assign(tic.a_type.Value()); - - switch (tic.texture_type.Value()) { - case Tegra::Texture::TextureType::Texture1D: - result.texture_type.Assign(TextureType::Texture1D); - return result; - case Tegra::Texture::TextureType::Texture2D: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::Texture3D: - result.texture_type.Assign(TextureType::Texture3D); - return result; - case Tegra::Texture::TextureType::TextureCubemap: - result.texture_type.Assign(TextureType::TextureCube); - return result; - case Tegra::Texture::TextureType::Texture1DArray: - result.texture_type.Assign(TextureType::Texture1D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DArray: - result.texture_type.Assign(TextureType::Texture2D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture1DBuffer: - result.texture_type.Assign(TextureType::Texture1D); - result.is_buffer.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DNoMipmap: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::TextureCubeArray: - result.texture_type.Assign(TextureType::TextureCube); - result.is_array.Assign(1); - return result; - default: - result.texture_type.Assign(TextureType::Texture2D); - return result; - } - } -}; -static_assert(std::is_trivially_copyable_v); - -class ConstBufferEngineInterface { -public: - virtual ~ConstBufferEngineInterface() = default; - virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; - virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; - virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const = 0; - virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; - virtual u32 GetBoundBuffer() const = 0; - - virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; - virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; -}; - -} // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a9b75091e..cae93c470 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -57,53 +57,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun } } -u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& buffer = launch_description.const_buffer_config[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); - return result; -} - -SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; - return AccessSampler(memory_manager.Read(tex_info_address)); -} - -SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); - - const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; - LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); - - rasterizer->DispatchCompute(code_addr); + rasterizer->DispatchCompute(); } Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7c40cba38..0d7683c2d 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,7 +10,6 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/engines/shader_type.h" @@ -40,7 +39,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { +class KeplerCompute final : public EngineInterface { public: explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); ~KeplerCompute(); @@ -209,23 +208,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - private: void ProcessLaunch(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index aab6b8f7a..103a51fd0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -670,42 +670,4 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer->Clear(); } -u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader_stage = state.shader_stages[static_cast(stage)]; - const auto& buffer = shader_stage.const_buffers[const_buffer]; - return memory_manager.Read(buffer.address + offset); -} - -SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader = state.shader_stages[static_cast(stage)]; - const auto& tex_info_buffer = shader.const_buffers[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.address + offset; - return AccessSampler(memory_manager.Read(tex_info_address)); -} - -SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 335383955..cbf94412b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -17,7 +17,6 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" @@ -49,7 +48,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { +class Maxwell3D final : public EngineInterface { public: explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); ~Maxwell3D(); @@ -1424,23 +1423,6 @@ public: void FlushMMEInlineDraw(); - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - bool ShouldExecute() const { return execute_on; } diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp deleted file mode 100644 index f058f2744..000000000 --- a/src/video_core/guest_driver.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/guest_driver.h" - -namespace VideoCore { - -void GuestDriverProfile::DeduceTextureHandlerSize(std::vector bound_offsets) { - if (texture_handler_size) { - return; - } - const std::size_t size = bound_offsets.size(); - if (size < 2) { - return; - } - std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); - u32 min_val = std::numeric_limits::max(); - for (std::size_t i = 1; i < size; ++i) { - if (bound_offsets[i] == bound_offsets[i - 1]) { - continue; - } - const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; - min_val = std::min(min_val, new_min); - } - if (min_val > 2) { - return; - } - texture_handler_size = min_texture_handler_size * min_val; -} - -} // namespace VideoCore diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h deleted file mode 100644 index 21e569ba1..000000000 --- a/src/video_core/guest_driver.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" - -namespace VideoCore { - -/** - * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect - * information necessary for impossible to avoid HLE methods like shader tracks as they are - * Entscheidungsproblems. - */ -class GuestDriverProfile { -public: - explicit GuestDriverProfile() = default; - explicit GuestDriverProfile(std::optional texture_handler_size_) - : texture_handler_size{texture_handler_size_} {} - - void DeduceTextureHandlerSize(std::vector bound_offsets); - - u32 GetTextureHandlerSize() const { - return texture_handler_size.value_or(default_texture_handler_size); - } - - bool IsTextureHandlerSizeKnown() const { - return texture_handler_size.has_value(); - } - -private: - // Minimum size of texture handler any driver can use. - static constexpr u32 min_texture_handler_size = 4; - - // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. - // Thus, certain drivers may squish the size. - static constexpr u32 default_texture_handler_size = 8; - - std::optional texture_handler_size = default_texture_handler_size; -}; - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 58014c1c3..b094fc064 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -11,7 +11,6 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" -#include "video_core/guest_driver.h" namespace Tegra { class MemoryManager; @@ -45,7 +44,7 @@ public: virtual void Clear() = 0; /// Dispatches a compute shader invocation - virtual void DispatchCompute(GPUVAddr code_addr) = 0; + virtual void DispatchCompute() = 0; /// Resets the counter of a query virtual void ResetCounter(QueryType type) = 0; @@ -136,18 +135,5 @@ public: /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const DiskResourceLoadCallback& callback) {} - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() { - return guest_driver_profile; - } - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const { - return guest_driver_profile; - } - -private: - GuestDriverProfile guest_driver_profile{}; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp deleted file mode 100644 index e8d8d2aa5..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ /dev/null @@ -1,2124 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -// Predicates in the decompiled code follow the convention that -1 means true and 0 means false. -// GLASM lacks booleans, so they have to be implemented as integers. -// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to -// select between two values, because -1 will be evaluated as true and 0 as false. - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; -using Operation = const OperationNode&; - -constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; - -char Swizzle(std::size_t component) { - static constexpr std::string_view SWIZZLE{"xyzw"}; - return SWIZZLE.at(component); -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast(index) - static_cast(Attribute::Index::Attribute_0); -} - -std::string_view Modifiers(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - if (meta && meta->precise) { - return ".PREC"; - } - return ""; -} - -std::string_view GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return ""; - case PixelImap::Constant: - return "FLAT "; - case PixelImap::ScreenLinear: - return "NOPERSPECTIVE "; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; -} - -std::string_view ImageType(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "BUFFER"; - case Tegra::Shader::ImageType::Texture1DArray: - return "ARRAY1D"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "ARRAY2D"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - } - UNREACHABLE(); - return {}; -} - -std::string_view StackName(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "SSY"; - case MetaStackClass::Pbk: - return "PBK"; - } - UNREACHABLE(); - return ""; -}; - -std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { - switch (topology) { - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: - return "POINTS"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: - return "LINES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: - return "LINES_ADJACENCY"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: - return "TRIANGLES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: - return "TRIANGLES_ADJACENCY"; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return "POINTS"; - } -} - -std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "POINTS"; - case Tegra::Shader::OutputTopology::LineStrip: - return "LINE_STRIP"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "TRIANGLE_STRIP"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -std::string_view StageInputName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - case ShaderType::Geometry: - return "vertex"; - case ShaderType::Fragment: - return "fragment"; - case ShaderType::Compute: - return "invocation"; - default: - UNREACHABLE(); - return ""; - } -} - -std::string TextureType(const MetaTexture& meta) { - if (meta.sampler.is_buffer) { - return "BUFFER"; - } - std::string type; - if (meta.sampler.is_shadow) { - type += "SHADOW"; - } - if (meta.sampler.is_array) { - type += "ARRAY"; - } - type += [&meta] { - switch (meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return "1D"; - case Tegra::Shader::TextureType::Texture2D: - return "2D"; - case Tegra::Shader::TextureType::Texture3D: - return "3D"; - case Tegra::Shader::TextureType::TextureCube: - return "CUBE"; - } - UNREACHABLE(); - return "2D"; - }(); - return type; -} - -class ARBDecompiler final { -public: - explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier); - - std::string Code() const { - return shader_source; - } - -private: - void DefineGlobalMemory(); - - void DeclareHeader(); - void DeclareVertex(); - void DeclareGeometry(); - void DeclareFragment(); - void DeclareCompute(); - void DeclareInputAttributes(); - void DeclareOutputAttributes(); - void DeclareLocalMemory(); - void DeclareGlobalMemory(); - void DeclareConstantBuffers(); - void DeclareRegisters(); - void DeclareTemporaries(); - void DeclarePredicates(); - void DeclareInternalFlags(); - - void InitializeVariables(); - - void DecompileAST(); - void DecompileBranchMode(); - - void VisitAST(const ASTNode& node); - std::string VisitExpression(const Expr& node); - - void VisitBlock(const NodeBlock& bb); - - std::string Visit(const Node& node); - - std::tuple BuildCoords(Operation); - std::string BuildAoffi(Operation); - std::string GlobalMemoryPointer(const GmemNode& gmem); - void Exit(); - - std::string Assign(Operation); - std::string Select(Operation); - std::string FClamp(Operation); - std::string FCastHalf0(Operation); - std::string FCastHalf1(Operation); - std::string FSqrt(Operation); - std::string FSwizzleAdd(Operation); - std::string HAdd2(Operation); - std::string HMul2(Operation); - std::string HFma2(Operation); - std::string HAbsolute(Operation); - std::string HNegate(Operation); - std::string HClamp(Operation); - std::string HCastFloat(Operation); - std::string HUnpack(Operation); - std::string HMergeF32(Operation); - std::string HMergeH0(Operation); - std::string HMergeH1(Operation); - std::string HPack2(Operation); - std::string LogicalAssign(Operation); - std::string LogicalPick2(Operation); - std::string LogicalAnd2(Operation); - std::string FloatOrdered(Operation); - std::string FloatUnordered(Operation); - std::string LogicalAddCarry(Operation); - std::string Texture(Operation); - std::string TextureGather(Operation); - std::string TextureQueryDimensions(Operation); - std::string TextureQueryLod(Operation); - std::string TexelFetch(Operation); - std::string TextureGradient(Operation); - std::string ImageLoad(Operation); - std::string ImageStore(Operation); - std::string Branch(Operation); - std::string BranchIndirect(Operation); - std::string PushFlowStack(Operation); - std::string PopFlowStack(Operation); - std::string Exit(Operation); - std::string Discard(Operation); - std::string EmitVertex(Operation); - std::string EndPrimitive(Operation); - std::string InvocationId(Operation); - std::string YNegate(Operation); - std::string ThreadId(Operation); - std::string ShuffleIndexed(Operation); - std::string Barrier(Operation); - std::string MemoryBarrierGroup(Operation); - std::string MemoryBarrierGlobal(Operation); - - template - std::string Unary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); - return temporary; - } - - template - std::string Binary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1])); - return temporary; - } - - template - std::string Trinary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1]), Visit(operation[2])); - return temporary; - } - - template - std::string FloatComparison(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("TRUNC.U.CC RC.x, {};", Binary(operation)); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NE.x), -1;", temporary); - - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - if constexpr (unordered) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - } else if (op == SNE_F) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - } - return temporary; - } - - template - std::string HalfComparison(Operation operation) { - std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - AddLine("UP2H.F {}, {};", tmp1, op_a); - AddLine("UP2H.F {}, {};", tmp2, op_b); - AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); - AddLine("TRUNC.U.CC RC.xy, {};", tmp1); - AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); - AddLine("MOV.S {}.x (NE.x), -1;", tmp1); - AddLine("MOV.S {}.y (NE.y), -1;", tmp1); - if constexpr (is_nan) { - AddLine("MOVC.F RC.x, {};", op_a); - AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); - AddLine("MOVC.F RC.x, {};", op_b); - AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); - } - return tmp1; - } - - template - std::string AtomicImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - - AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, - image_id, ImageType(meta.image.type)); - return fmt::format("{}.x", coord); - } - - template - std::string Atomic(Operation operation) { - std::string temporary = AllocTemporary(); - std::string address; - std::string_view opname; - bool robust = false; - if (const auto gmem = std::get_if(&*operation[0])) { - address = GlobalMemoryPointer(*gmem); - opname = "ATOM"; - robust = true; - } else if (const auto smem = std::get_if(&*operation[0])) { - address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); - opname = "ATOMS"; - } else { - UNREACHABLE(); - return "{0, 0, 0, 0}"; - } - if (robust) { - AddLine("IF NE.x;"); - } - AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); - if (robust) { - AddLine("ELSE;"); - AddLine("MOV.S {}, 0;", temporary); - AddLine("ENDIF;"); - } - return temporary; - } - - template - std::string Negate(Operation operation) { - std::string temporary = AllocTemporary(); - if constexpr (type == 'F') { - AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); - } else { - AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); - } - return temporary; - } - - template - std::string Absolute(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); - return temporary; - } - - template - std::string BitfieldInsert(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); - AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), - Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template - std::string BitfieldExtract(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); - AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template - std::string LocalInvocationId(Operation) { - return fmt::format("invocation.localid.{}", swizzle); - } - - template - std::string WorkGroupId(Operation) { - return fmt::format("invocation.groupid.{}", swizzle); - } - - template - std::string ThreadMask(Operation) { - return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); - } - - template - void AddExpression(std::string_view text, Args&&... args) { - shader_source += fmt::format(fmt::runtime(text), std::forward(args)...); - } - - template - void AddLine(std::string_view text, Args&&... args) { - AddExpression(text, std::forward(args)...); - shader_source += '\n'; - } - - std::string AllocLongVectorTemporary() { - max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); - return fmt::format("L{}", num_long_temporaries++); - } - - std::string AllocLongTemporary() { - return fmt::format("{}.x", AllocLongVectorTemporary()); - } - - std::string AllocVectorTemporary() { - max_temporaries = std::max(max_temporaries, num_temporaries + 1); - return fmt::format("T{}", num_temporaries++); - } - - std::string AllocTemporary() { - return fmt::format("{}.x", AllocVectorTemporary()); - } - - void ResetTemporaries() noexcept { - num_temporaries = 0; - num_long_temporaries = 0; - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - - std::size_t num_temporaries = 0; - std::size_t max_temporaries = 0; - - std::size_t num_long_temporaries = 0; - std::size_t max_long_temporaries = 0; - - std::map global_memory_names; - - std::string shader_source; - - static constexpr std::string_view ADD_F32 = "ADD.F32"; - static constexpr std::string_view ADD_S = "ADD.S"; - static constexpr std::string_view ADD_U = "ADD.U"; - static constexpr std::string_view MUL_F32 = "MUL.F32"; - static constexpr std::string_view MUL_S = "MUL.S"; - static constexpr std::string_view MUL_U = "MUL.U"; - static constexpr std::string_view DIV_F32 = "DIV.F32"; - static constexpr std::string_view DIV_S = "DIV.S"; - static constexpr std::string_view DIV_U = "DIV.U"; - static constexpr std::string_view MAD_F32 = "MAD.F32"; - static constexpr std::string_view RSQ_F32 = "RSQ.F32"; - static constexpr std::string_view COS_F32 = "COS.F32"; - static constexpr std::string_view SIN_F32 = "SIN.F32"; - static constexpr std::string_view EX2_F32 = "EX2.F32"; - static constexpr std::string_view LG2_F32 = "LG2.F32"; - static constexpr std::string_view SLT_F = "SLT.F32"; - static constexpr std::string_view SLT_S = "SLT.S"; - static constexpr std::string_view SLT_U = "SLT.U"; - static constexpr std::string_view SEQ_F = "SEQ.F32"; - static constexpr std::string_view SEQ_S = "SEQ.S"; - static constexpr std::string_view SEQ_U = "SEQ.U"; - static constexpr std::string_view SLE_F = "SLE.F32"; - static constexpr std::string_view SLE_S = "SLE.S"; - static constexpr std::string_view SLE_U = "SLE.U"; - static constexpr std::string_view SGT_F = "SGT.F32"; - static constexpr std::string_view SGT_S = "SGT.S"; - static constexpr std::string_view SGT_U = "SGT.U"; - static constexpr std::string_view SNE_F = "SNE.F32"; - static constexpr std::string_view SNE_S = "SNE.S"; - static constexpr std::string_view SNE_U = "SNE.U"; - static constexpr std::string_view SGE_F = "SGE.F32"; - static constexpr std::string_view SGE_S = "SGE.S"; - static constexpr std::string_view SGE_U = "SGE.U"; - static constexpr std::string_view AND_S = "AND.S"; - static constexpr std::string_view AND_U = "AND.U"; - static constexpr std::string_view TRUNC_F = "TRUNC.F"; - static constexpr std::string_view TRUNC_S = "TRUNC.S"; - static constexpr std::string_view TRUNC_U = "TRUNC.U"; - static constexpr std::string_view SHL_S = "SHL.S"; - static constexpr std::string_view SHL_U = "SHL.U"; - static constexpr std::string_view SHR_S = "SHR.S"; - static constexpr std::string_view SHR_U = "SHR.U"; - static constexpr std::string_view OR_S = "OR.S"; - static constexpr std::string_view OR_U = "OR.U"; - static constexpr std::string_view XOR_S = "XOR.S"; - static constexpr std::string_view XOR_U = "XOR.U"; - static constexpr std::string_view NOT_S = "NOT.S"; - static constexpr std::string_view NOT_U = "NOT.U"; - static constexpr std::string_view BTC_S = "BTC.S"; - static constexpr std::string_view BTC_U = "BTC.U"; - static constexpr std::string_view BTFM_S = "BTFM.S"; - static constexpr std::string_view BTFM_U = "BTFM.U"; - static constexpr std::string_view ROUND_F = "ROUND.F"; - static constexpr std::string_view CEIL_F = "CEIL.F"; - static constexpr std::string_view FLR_F = "FLR.F"; - static constexpr std::string_view I2F_S = "I2F.S"; - static constexpr std::string_view I2F_U = "I2F.U"; - static constexpr std::string_view MIN_F = "MIN.F"; - static constexpr std::string_view MIN_S = "MIN.S"; - static constexpr std::string_view MIN_U = "MIN.U"; - static constexpr std::string_view MAX_F = "MAX.F"; - static constexpr std::string_view MAX_S = "MAX.S"; - static constexpr std::string_view MAX_U = "MAX.U"; - static constexpr std::string_view MOV_U = "MOV.U"; - static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; - static constexpr std::string_view TGALL_U = "TGALL.U"; - static constexpr std::string_view TGANY_U = "TGANY.U"; - static constexpr std::string_view TGEQ_U = "TGEQ.U"; - static constexpr std::string_view EXCH = "EXCH"; - static constexpr std::string_view ADD = "ADD"; - static constexpr std::string_view MIN = "MIN"; - static constexpr std::string_view MAX = "MAX"; - static constexpr std::string_view AND = "AND"; - static constexpr std::string_view OR = "OR"; - static constexpr std::string_view XOR = "XOR"; - static constexpr std::string_view U32 = "U32"; - static constexpr std::string_view S32 = "S32"; - - static constexpr std::size_t NUM_ENTRIES = static_cast(OperationCode::Amount); - using DecompilerType = std::string (ARBDecompiler::*)(Operation); - static constexpr std::array OPERATION_DECOMPILERS = { - &ARBDecompiler::Assign, - - &ARBDecompiler::Select, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Trinary, - &ARBDecompiler::Negate<'F'>, - &ARBDecompiler::Absolute<'F'>, - &ARBDecompiler::FClamp, - &ARBDecompiler::FCastHalf0, - &ARBDecompiler::FCastHalf1, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::FSqrt, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::FSwizzleAdd, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Negate<'S'>, - &ARBDecompiler::Absolute<'S'>, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::BitfieldInsert<'S'>, - &ARBDecompiler::BitfieldExtract<'S'>, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::BitfieldInsert<'U'>, - &ARBDecompiler::BitfieldExtract<'U'>, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::HAdd2, - &ARBDecompiler::HMul2, - &ARBDecompiler::HFma2, - &ARBDecompiler::HAbsolute, - &ARBDecompiler::HNegate, - &ARBDecompiler::HClamp, - &ARBDecompiler::HCastFloat, - &ARBDecompiler::HUnpack, - &ARBDecompiler::HMergeF32, - &ARBDecompiler::HMergeH0, - &ARBDecompiler::HMergeH1, - &ARBDecompiler::HPack2, - - &ARBDecompiler::LogicalAssign, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::LogicalPick2, - &ARBDecompiler::LogicalAnd2, - - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatOrdered, - &ARBDecompiler::FloatUnordered, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::LogicalAddCarry, - - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - - &ARBDecompiler::Texture, - &ARBDecompiler::Texture, - &ARBDecompiler::TextureGather, - &ARBDecompiler::TextureQueryDimensions, - &ARBDecompiler::TextureQueryLod, - &ARBDecompiler::TexelFetch, - &ARBDecompiler::TextureGradient, - - &ARBDecompiler::ImageLoad, - &ARBDecompiler::ImageStore, - - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Branch, - &ARBDecompiler::BranchIndirect, - &ARBDecompiler::PushFlowStack, - &ARBDecompiler::PopFlowStack, - &ARBDecompiler::Exit, - &ARBDecompiler::Discard, - - &ARBDecompiler::EmitVertex, - &ARBDecompiler::EndPrimitive, - - &ARBDecompiler::InvocationId, - &ARBDecompiler::YNegate, - &ARBDecompiler::LocalInvocationId<'x'>, - &ARBDecompiler::LocalInvocationId<'y'>, - &ARBDecompiler::LocalInvocationId<'z'>, - &ARBDecompiler::WorkGroupId<'x'>, - &ARBDecompiler::WorkGroupId<'y'>, - &ARBDecompiler::WorkGroupId<'z'>, - - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::ThreadId, - &ARBDecompiler::ThreadMask<'e', 'q'>, - &ARBDecompiler::ThreadMask<'g', 'e'>, - &ARBDecompiler::ThreadMask<'g', 't'>, - &ARBDecompiler::ThreadMask<'l', 'e'>, - &ARBDecompiler::ThreadMask<'l', 't'>, - &ARBDecompiler::ShuffleIndexed, - - &ARBDecompiler::Barrier, - &ARBDecompiler::MemoryBarrierGroup, - &ARBDecompiler::MemoryBarrierGlobal, - }; -}; - -ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { - DefineGlobalMemory(); - - AddLine("TEMP RC;"); - AddLine("TEMP FSWZA[4];"); - AddLine("TEMP FSWZB[4];"); - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - AddLine("END"); - - const std::string code = std::move(shader_source); - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareLocalMemory(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareRegisters(); - DeclareTemporaries(); - DeclarePredicates(); - DeclareInternalFlags(); - - shader_source += code; -} - -std::string_view HeaderStageName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - return "vp"; - case ShaderType::Geometry: - return "gp"; - case ShaderType::Fragment: - return "fp"; - case ShaderType::Compute: - return "cp"; - default: - UNREACHABLE(); - return ""; - } -} - -void ARBDecompiler::DefineGlobalMemory() { - u32 binding = 0; - for (const auto& pair : ir.GetGlobalMemory()) { - const GlobalMemoryBase base = pair.first; - global_memory_names.emplace(base, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareHeader() { - AddLine("!!NV{}5.0", HeaderStageName(stage)); - // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D - AddLine("OPTION NV_internal;"); - AddLine("OPTION NV_gpu_program_fp64;"); - AddLine("OPTION NV_shader_thread_group;"); - if (ir.UsesWarps() && device.HasWarpIntrinsics()) { - AddLine("OPTION NV_shader_thread_shuffle;"); - } - if (stage == ShaderType::Vertex) { - if (device.HasNvViewportArray2()) { - AddLine("OPTION NV_viewport_array2;"); - } - } - if (stage == ShaderType::Fragment) { - AddLine("OPTION ARB_draw_buffers;"); - } - if (device.HasImageLoadFormatted()) { - AddLine("OPTION EXT_shader_image_load_formatted;"); - } -} - -void ARBDecompiler::DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); -} - -void ARBDecompiler::DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const auto& header = ir.GetHeader(); - AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); - AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); - AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); - AddLine("ATTRIB vertex_position = vertex.position;"); -} - -void ARBDecompiler::DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - AddLine("OUTPUT result_color7 = result.color[7];"); - AddLine("OUTPUT result_color6 = result.color[6];"); - AddLine("OUTPUT result_color5 = result.color[5];"); - AddLine("OUTPUT result_color4 = result.color[4];"); - AddLine("OUTPUT result_color3 = result.color[3];"); - AddLine("OUTPUT result_color2 = result.color[2];"); - AddLine("OUTPUT result_color1 = result.color[1];"); - AddLine("OUTPUT result_color0 = result.color;"); -} - -void ARBDecompiler::DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const ComputeInfo& info = registry.GetComputeInfo(); - AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], - info.workgroup_size[2]); - if (info.shared_memory_size_in_words == 0) { - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - u32 size_in_bytes = info.shared_memory_size_in_words * 4; - if (size_in_bytes > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size_in_bytes, limit); - size_in_bytes = limit; - } - - AddLine("SHARED_MEMORY {};", size_in_bytes); - AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); -} - -void ARBDecompiler::DeclareInputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - const std::string_view stage_name = StageInputName(stage); - for (const auto attribute : ir.GetInputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - - std::string_view suffix; - if (stage == ShaderType::Fragment) { - const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix = GetInputFlags(input_mode); - } - AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, - index); - } -} - -void ARBDecompiler::DeclareOutputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); - } -} - -void ARBDecompiler::DeclareLocalMemory() { - u64 size = 0; - if (stage == ShaderType::Compute) { - size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - size = ir.GetHeader().GetLocalMemorySize(); - } - if (size == 0) { - return; - } - const u64 element_count = Common::AlignUp(size, 4) / 4; - AddLine("TEMP lmem[{}];", element_count); -} - -void ARBDecompiler::DeclareGlobalMemory() { - const size_t num_entries = ir.GetGlobalMemory().size(); - if (num_entries > 0) { - AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); - } -} - -void ARBDecompiler::DeclareConstantBuffers() { - u32 binding = 0; - for (const auto& cbuf : ir.GetConstantBuffers()) { - AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - AddLine("TEMP R{};", gpr); - } -} - -void ARBDecompiler::DeclareTemporaries() { - for (std::size_t i = 0; i < max_temporaries; ++i) { - AddLine("TEMP T{};", i); - } - for (std::size_t i = 0; i < max_long_temporaries; ++i) { - AddLine("LONG TEMP L{};", i); - } -} - -void ARBDecompiler::DeclarePredicates() { - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("TEMP P{};", static_cast(pred)); - } -} - -void ARBDecompiler::DeclareInternalFlags() { - for (const char* name : INTERNAL_FLAG_NAMES) { - AddLine("TEMP {};", name); - } -} - -void ARBDecompiler::InitializeVariables() { - AddLine("MOV.F32 FSWZA[0], -1;"); - AddLine("MOV.F32 FSWZA[1], 1;"); - AddLine("MOV.F32 FSWZA[2], -1;"); - AddLine("MOV.F32 FSWZA[3], 0;"); - AddLine("MOV.F32 FSWZB[0], -1;"); - AddLine("MOV.F32 FSWZB[1], -1;"); - AddLine("MOV.F32 FSWZB[2], 1;"); - AddLine("MOV.F32 FSWZB[3], -1;"); - - if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { - AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); - } - for (const u32 gpr : ir.GetRegisters()) { - AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); - } - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast(pred)); - } -} - -void ARBDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("TEMP F{};", i); - } - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); - } - - InitializeVariables(); - - VisitAST(ir.GetASTProgram()); -} - -void ARBDecompiler::DecompileBranchMode() { - static constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); - AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); - AddLine("TEMP SSY_TOP;"); - AddLine("TEMP PBK_TOP;"); - } - - AddLine("TEMP PC;"); - - if (!ir.IsFlowStackDisabled()) { - AddLine("MOV.U SSY_TOP.x, 0;"); - AddLine("MOV.U PBK_TOP.x, 0;"); - } - - InitializeVariables(); - - const auto basic_block_end = ir.GetBasicBlocks().end(); - auto basic_block_it = ir.GetBasicBlocks().begin(); - const u32 first_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", first_address); - - AddLine("REP;"); - - std::size_t num_blocks = 0; - while (basic_block_it != basic_block_end) { - const auto& [address, bb] = *basic_block_it; - ++num_blocks; - - AddLine("SEQ.S.CC RC.x, PC.x, {};", address); - AddLine("IF NE.x;"); - - VisitBlock(bb); - - ++basic_block_it; - - if (basic_block_it != basic_block_end) { - const auto op = std::get_if(&*bb[bb.size() - 1]); - if (!op || op->GetCode() != OperationCode::Branch) { - const u32 next_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", next_address); - AddLine("CONT;"); - } - } - - AddLine("ELSE;"); - } - AddLine("RET;"); - while (num_blocks--) { - AddLine("ENDIF;"); - } - - AddLine("ENDREP;"); -} - -void ARBDecompiler::VisitAST(const ASTNode& node) { - if (const auto ast = std::get_if(&*node->GetInnerData())) { - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto if_then = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(if_then->condition); - ResetTemporaries(); - - AddLine("MOVC.U RC.x, {};", condition); - AddLine("IF NE.x;"); - for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("ENDIF;"); - } else if (const auto if_else = std::get_if(&*node->GetInnerData())) { - AddLine("ELSE;"); - for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto decoded = std::get_if(&*node->GetInnerData())) { - VisitBlock(decoded->nodes); - } else if (const auto var_set = std::get_if(&*node->GetInnerData())) { - AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); - ResetTemporaries(); - } else if (const auto do_while = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(do_while->condition); - ResetTemporaries(); - AddLine("REP;"); - for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("MOVC.U RC.x, {};", condition); - AddLine("BRK (NE.x);"); - AddLine("ENDREP;"); - } else if (const auto ast_return = std::get_if(&*node->GetInnerData())) { - const bool is_true = ExprIsTrue(ast_return->condition); - if (!is_true) { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); - AddLine("IF NE.x;"); - ResetTemporaries(); - } - if (ast_return->kills) { - AddLine("KIL TR;"); - } else { - Exit(); - } - if (!is_true) { - AddLine("ENDIF;"); - } - } else if (const auto ast_break = std::get_if(&*node->GetInnerData())) { - if (ExprIsTrue(ast_break->condition)) { - AddLine("BRK;"); - } else { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); - AddLine("BRK (NE.x);"); - ResetTemporaries(); - } - } else if (std::holds_alternative(*node->GetInnerData())) { - // Nothing to do - } else { - UNREACHABLE(); - } -} - -std::string ARBDecompiler::VisitExpression(const Expr& node) { - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - return fmt::format("P{}.x", static_cast(expr->predicate)); - } - if (const auto expr = std::get_if(&*node)) { - return Visit(ir.GetConditionCode(expr->cc)); - } - if (const auto expr = std::get_if(&*node)) { - return fmt::format("F{}.x", expr->var_index); - } - if (const auto expr = std::get_if(&*node)) { - return expr->value ? "0xffffffff" : "0"; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); - return result; - } - UNREACHABLE(); - return "0"; -} - -void ARBDecompiler::VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } -} - -std::string ARBDecompiler::Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - const std::size_t index = static_cast(operation->GetCode()); - if (index >= OPERATION_DECOMPILERS.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", index); - return {}; - } - const auto decompiler = OPERATION_DECOMPILERS[index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return "{0, 0, 0, 0}.x"; - } - return fmt::format("R{}.x", index); - } - - if (const auto cv = std::get_if(&*node)) { - return fmt::format("CV{}.x", cv->GetIndex()); - } - - if (const auto immediate = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); - return temporary; - } - - if (const auto predicate = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - AddLine("MOV.S {}, -1;", temporary); - break; - case Tegra::Shader::Pred::NeverExecute: - AddLine("MOV.S {}, 0;", temporary); - break; - default: - AddLine("MOV.S {}, P{}.x;", temporary, static_cast(index)); - break; - } - if (predicate->IsNegated()) { - AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); - } - return temporary; - } - - if (const auto abuf = std::get_if(&*node)) { - if (abuf->IsPhysicalBuffer()) { - UNIMPLEMENTED_MSG("Physical buffers are not implemented"); - return "{0, 0, 0, 0}.x"; - } - - const Attribute::Index index = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (index) { - case Attribute::Index::Position: { - if (stage == ShaderType::Geometry) { - return fmt::format("{}_position[{}].{}", StageInputName(stage), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.position.{}", StageInputName(stage), swizzle); - } - } - case Attribute::Index::TessCoordInstanceIDVertexID: - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - return "vertex.instance"; - case 3: - return "vertex.id"; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - break; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return "fragment.pointcoord.x"; - case 1: - return "fragment.pointcoord.y"; - } - UNIMPLEMENTED(); - break; - case Attribute::Index::FrontFacing: { - ASSERT(stage == ShaderType::Fragment); - ASSERT(element == 3); - const std::string temporary = AllocVectorTemporary(); - AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); - AddLine("MOV.U.CC RC.x, -RC;"); - AddLine("MOV.S {}.x, 0;", temporary); - AddLine("MOV.S {}.x (NE.x), -1;", temporary); - return fmt::format("{}.x", temporary); - } - default: - if (IsGenericAttribute(index)) { - if (stage == ShaderType::Geometry) { - return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.attrib[{}].{}", StageInputName(stage), - GetGenericAttributeIndex(index), swizzle); - } - } - UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); - break; - } - return "{0, 0, 0, 0}.x"; - } - - if (const auto cbuf = std::get_if(&*node)) { - std::string offset_string; - const auto& offset = cbuf->GetOffset(); - if (const auto imm = std::get_if(&*offset)) { - offset_string = std::to_string(imm->GetValue()); - } else { - offset_string = Visit(offset); - } - std::string temporary = AllocTemporary(); - AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); - return temporary; - } - - if (const auto gmem = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV {}, 0;", temporary); - AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); - return temporary; - } - - if (const auto lmem = std::get_if(&*node)) { - std::string temporary = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", temporary, temporary); - AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); - return temporary; - } - - if (const auto smem = std::get_if(&*node)) { - std::string temporary = Visit(smem->GetAddress()); - AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); - return temporary; - } - - if (const auto internal_flag = std::get_if(&*node)) { - const std::size_t index = static_cast(internal_flag->GetFlag()); - return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); - AddLine("IF NE.x;"); - VisitBlock(conditional->GetCode()); - AddLine("ENDIF;"); - return {}; - } - - if ([[maybe_unused]] const auto cmt = std::get_if(&*node)) { - // Uncommenting this will generate invalid code. GLASM lacks comments. - // AddLine("// {}", cmt->GetText()); - return {}; - } - - UNIMPLEMENTED(); - return {}; -} - -std::tuple ARBDecompiler::BuildCoords(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.sampler.is_indexed); - - const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && - meta.sampler.type == Tegra::Shader::TextureType::TextureCube; - const std::size_t count = operation.GetOperandsCount(); - std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - if (meta.sampler.is_array) { - AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); - ++i; - } - if (meta.sampler.is_shadow) { - std::string compare = Visit(meta.depth_compare); - if (is_extended) { - ASSERT(i == 4); - std::string extra_coord = AllocVectorTemporary(); - AddLine("MOV.F {}.x, {};", extra_coord, compare); - return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; - } - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); - ++i; - } - return {temporary, temporary, i}; -} - -std::string ARBDecompiler::BuildAoffi(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - if (meta.aoffi.empty()) { - return {}; - } - const std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (auto& node : meta.aoffi) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); - } - return fmt::format(", offset({})", temporary); -} - -std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { - // Read a bindless SSBO, return its address and set CC accordingly - // address = c[binding].xy - // length = c[binding].z - const u32 binding = global_memory_names.at(gmem.GetDescriptor()); - - const std::string pointer = AllocLongVectorTemporary(); - std::string temporary = AllocTemporary(); - - AddLine("PK64.U {}, c[{}];", pointer, binding); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), - Visit(gmem.GetBaseAddress())); - AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); - AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); - // Compare offset to length and set CC - AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); - return fmt::format("{}.x", pointer); -} - -void ARBDecompiler::Exit() { - if (stage != ShaderType::Fragment) { - AddLine("RET;"); - return; - } - - const auto safe_get_register = [this](u32 reg) -> std::string { - if (ir.GetRegisters().contains(reg)) { - return fmt::format("R{}.x", reg); - } - return "{0, 0, 0, 0}.x"; - }; - - const auto& header = ir.GetHeader(); - u32 current_reg = 0; - for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), - safe_get_register(current_reg)); - ++current_reg; - } - } - if (header.ps.omap.depth) { - AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); - } - - AddLine("RET;"); -} - -std::string ARBDecompiler::Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string dest_name; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op - return {}; - } - dest_name = fmt::format("R{}.x", gpr->GetIndex()); - } else if (const auto abuf = std::get_if(&*dest)) { - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (const Attribute::Index index = abuf->GetIndex()) { - case Attribute::Index::Position: - dest_name = fmt::format("result.position.{}", swizzle); - break; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return {}; - case 1: - case 2: - if (!device.HasNvViewportArray2()) { - LOG_ERROR( - Render_OpenGL, - "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); - return {}; - } - dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; - break; - case 3: - dest_name = "result.pointsize.x"; - break; - } - break; - case Attribute::Index::ClipDistances0123: - dest_name = fmt::format("result.clip[{}].x", element); - break; - case Attribute::Index::ClipDistances4567: - dest_name = fmt::format("result.clip[{}].x", element + 4); - break; - default: - if (!IsGenericAttribute(index)) { - UNREACHABLE(); - return {}; - } - dest_name = - fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); - break; - } - } else if (const auto lmem = std::get_if(&*dest)) { - const std::string address = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", address, address); - dest_name = fmt::format("lmem[{}].x", address); - } else if (const auto smem = std::get_if(&*dest)) { - AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); - ResetTemporaries(); - return {}; - } else if (const auto gmem = std::get_if(&*dest)) { - AddLine("IF NE.x;"); - AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); - AddLine("ENDIF;"); - ResetTemporaries(); - return {}; - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", dest_name, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::Select(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), - Visit(operation[2])); - return temporary; -} - -std::string ARBDecompiler::FClamp(Operation operation) { - // 1.0f in hex, replace with std::bit_cast on C++20 - static constexpr u32 POSITIVE_ONE = 0x3f800000; - - std::string temporary = AllocTemporary(); - const Node& value = operation[0]; - const Node& low = operation[1]; - const Node& high = operation[2]; - const auto* const imm_low = std::get_if(&*low); - const auto* const imm_high = std::get_if(&*high); - if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { - AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); - } else { - AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); - AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); - } - return temporary; -} - -std::string ARBDecompiler::FCastHalf0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FCastHalf1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); - AddLine("MOV {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FSqrt(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); - AddLine("RCP.F32 {}, {};", temporary, temporary); - return temporary; -} - -std::string ARBDecompiler::FSwizzleAdd(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); - return fmt::format("{}.x", temporary); - } - - AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); - AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); - AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); - AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); - AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); - AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); - AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HAdd2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HMul2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HFma2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string tmp3 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); - AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HAbsolute(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HNegate(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("MOVC.S RC.x, {};", Visit(operation[1])); - AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); - AddLine("MOVC.S RC.x, {};", Visit(operation[2])); - AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HClamp(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HCastFloat(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); - AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HUnpack(Operation operation) { - std::string operand = Visit(operation[0]); - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H0_H0: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H1_H1: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - } - UNREACHABLE(); - return "{0, 0, 0, 0}.x"; -} - -std::string ARBDecompiler::HMergeF32(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.x, {}.z;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.w;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HPack2(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); - AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const Tegra::Shader::Pred index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = fmt::format("P{}.x", static_cast(index)); - } else if (const auto internal_flag = std::get_if(&*dest)) { - const std::size_t index = static_cast(internal_flag->GetFlag()); - target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", target, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::LogicalPick2(Operation operation) { - std::string temporary = AllocTemporary(); - const u32 index = std::get(*operation[1]).GetValue(); - AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); - return temporary; -} - -std::string ARBDecompiler::LogicalAnd2(Operation operation) { - std::string temporary = AllocTemporary(); - const std::string op = Visit(operation[0]); - AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); - return temporary; -} - -std::string ARBDecompiler::FloatOrdered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, -1;", temporary); - AddLine("MOV.S {} (NAN.x), 0;", temporary); - AddLine("MOV.S {} (NAN.y), 0;", temporary); - return temporary; -} - -std::string ARBDecompiler::FloatUnordered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NAN.x), -1;", temporary); - AddLine("MOV.S {} (NAN.y), -1;", temporary); - return temporary; -} - -std::string ARBDecompiler::LogicalAddCarry(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("IF CF.x;"); - AddLine("MOV.S {}, -1;", temporary); - AddLine("ENDIF;"); - return temporary; -} - -std::string ARBDecompiler::Texture(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string_view opcode = "TEX"; - std::string extra; - if (meta.bias) { - ASSERT(!meta.lod); - opcode = "TXB"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); - } else { - const std::string bias = AllocTemporary(); - AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); - extra = fmt::format(" {},", bias); - } - } - if (meta.lod) { - ASSERT(!meta.bias); - opcode = "TXL"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } else { - const std::string lod = AllocTemporary(); - AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); - extra = fmt::format(" {},", lod); - } - } - - AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string comp; - if (!meta.sampler.is_shadow) { - const auto& immediate = std::get(*meta.component); - comp = fmt::format(".{}", Swizzle(immediate.GetValue())); - } - - AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; - AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::size_t count = operation.GetOperandsCount(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); - AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); - AddLine("TRUNC.S {}, {};", temporary, temporary); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TexelFetch(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - if (!meta.sampler.is_buffer) { - ASSERT(swizzle < 4); - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } - AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), - BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const std::string ddx = AllocVectorTemporary(); - const std::string ddy = AllocVectorTemporary(); - const std::string coord = std::get<1>(BuildCoords(operation)); - - const std::size_t num_components = meta.derivates.size() / 2; - for (std::size_t index = 0; index < num_components; ++index) { - const char swizzle = Swizzle(index); - AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); - AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); - } - - const std::string_view result = coord; - AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); - return fmt::format("{}.x", result); -} - -std::string ARBDecompiler::ImageLoad(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t count = operation.GetOperandsCount(); - const std::string_view type = ImageType(meta.image.type); - - const std::string temporary = AllocVectorTemporary(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); - AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::ImageStore(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - const std::string_view type = ImageType(meta.image.type); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); - return {}; -} - -std::string ARBDecompiler::Branch(Operation operation) { - const auto target = std::get(*operation[0]); - AddLine("MOV.U PC.x, {};", target.GetValue()); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::BranchIndirect(Operation operation) { - AddLine("MOV.U PC.x, {};", Visit(operation[0])); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::PushFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const u32 target = std::get(*operation[0]).GetValue(); - const std::string_view stack_name = StackName(stack); - AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); - AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - return {}; -} - -std::string ARBDecompiler::PopFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const std::string_view stack_name = StackName(stack); - AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::Exit(Operation) { - Exit(); - return {}; -} - -std::string ARBDecompiler::Discard(Operation) { - AddLine("KIL TR;"); - return {}; -} - -std::string ARBDecompiler::EmitVertex(Operation) { - AddLine("EMIT;"); - return {}; -} - -std::string ARBDecompiler::EndPrimitive(Operation) { - AddLine("ENDPRIM;"); - return {}; -} - -std::string ARBDecompiler::InvocationId(Operation) { - return "primitive.invocation"; -} - -std::string ARBDecompiler::YNegate(Operation) { - LOG_WARNING(Render_OpenGL, "(STUBBED)"); - std::string temporary = AllocTemporary(); - AddLine("MOV.F {}, 1;", temporary); - return temporary; -} - -std::string ARBDecompiler::ThreadId(Operation) { - return fmt::format("{}.threadid", StageInputName(stage)); -} - -std::string ARBDecompiler::ShuffleIndexed(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - return Visit(operation[0]); - } - const std::string temporary = AllocVectorTemporary(); - AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), - Visit(operation[1])); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::Barrier(Operation) { - AddLine("BAR;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGroup(Operation) { - AddLine("MEMBAR.CTA;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { - AddLine("MEMBAR;"); - return {}; -} - -} // Anonymous namespace - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier) { - return ARBDecompiler(device, ir, registry, stage, identifier).Code(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h deleted file mode 100644 index 6afc87220..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" - -namespace Tegra::Engines { -enum class ShaderType : u32; -} - -namespace VideoCommon::Shader { -class ShaderIR; -class Registry; -} // namespace VideoCommon::Shader - -namespace OpenGL { - -class Device; - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ceb3abcb2..3551dbdcc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -54,40 +54,6 @@ namespace { constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -template -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - ShaderType shader_type, size_t index = 0) { - if constexpr (std::is_same_v) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -119,44 +85,6 @@ std::pair TransformFeedbackEnum(u8 location) { void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::Buffer; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), - fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), - async_shaders(emu_window_) { - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } -} + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() { } } -void RasterizerOpenGL::SetupShaders(bool is_indexed) { - u32 clip_distances = 0; - - std::array shaders{}; - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeGraphicsDescriptors(); - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto& shader_config = maxwell3d.regs.shader_config[index]; - const auto program{static_cast(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - switch (program) { - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(0); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(0); - break; - default: - break; - } - continue; - } - // Currently this stages are not supported in the OpenGL backend. - // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL - if (program == Maxwell::ShaderProgram::TesselationControl || - program == Maxwell::ShaderProgram::TesselationEval) { - continue; - } - - Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; - switch (program) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - program_manager.UseVertexShader(program_handle); - break; - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(program_handle); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(program_handle); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, - shader_config.enable.Value(), shader_config.offset); - break; - } - - // Stage indices are 0 - 5 - const size_t stage = index == 0 ? 0 : index - 1; - shaders[stage] = shader; - - SetupDrawTextures(shader, stage); - SetupDrawImages(shader, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); - - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : shader->GetEntries().global_memory_entries) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - - // Workaround for Intel drivers. - // When a clip distance is enabled but not set in the shader it crops parts of the screen - // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the - // clip distances only when it's written by a shader stage. - clip_distances |= shader->GetEntries().clip_distances; - - // When VertexA is enabled, we have dual vertex shaders - if (program == Maxwell::ShaderProgram::VertexA) { - // VertexB was combined with VertexA, so we skip the VertexB iteration - ++index; - } - } - SyncClipEnabled(clip_distances); - maxwell3d.dirty.flags[Dirty::Shaders] = false; - - buffer_cache.UpdateGraphicsBuffers(is_indexed); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - const Shader* const shader = shaders[stage]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - const auto& base = device.GetBaseBindings(stage); - BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, - texture_index, image_index); - } -} - void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - shader_cache.LoadDiskCache(title_id, stop_loading, callback); -} + const VideoCore::DiskResourceLoadCallback& callback) {} void RasterizerOpenGL::Clear() { MICROPROFILE_SCOPE(OpenGL_Clears); @@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Setup shaders and their used resources. std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - SetupShaders(is_indexed); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); @@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { gpu.TickWork(); } -void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - BindComputeTextures(kernel); - - const auto& entries = kernel->GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_memory_entries) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); - - const auto& launch_desc = kepler_compute.launch_description; - glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); - ++num_queued_commands; +void RasterizerOpenGL::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeTextures(kernel); - SetupComputeImages(kernel); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - program_manager.BindCompute(kernel->GetHandle()); - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); -} - -void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, - GLuint base_image, size_t& image_view_index, - size_t& texture_index, size_t& image_index) { - const GLuint* const samplers = sampler_handles.data() + texture_index; - const GLuint* const textures = texture_handles.data() + texture_index; - const GLuint* const images = image_handles.data() + image_index; - - const size_t num_samplers = entries.samplers.size(); - for (const auto& sampler : entries.samplers) { - for (size_t i = 0; i < sampler.size; ++i) { - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); - texture_handles[texture_index++] = handle; - } - } - const size_t num_images = entries.images.size(); - for (size_t unit = 0; unit < num_images; ++unit) { - // TODO: Mark as modified - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); - image_handles[image_index] = handle; - ++image_index; - } - if (num_samplers > 0) { - glBindSamplers(base_texture, static_cast(num_samplers), samplers); - glBindTextures(base_texture, static_cast(num_samplers), textures); - } - if (num_images > 0) { - glBindImageTextures(base_image, static_cast(num_images), images); - } -} - -void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().samplers) { - const auto shader_type = static_cast(stage_index); - for (size_t index = 0; index < entry.size; ++index) { - const auto handle = - GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); - const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : kernel->GetEntries().samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); - const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().images) { - const auto shader_type = static_cast(stage_index); - const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : shader->GetEntries().images) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); - image_view_indices.push_back(handle.image); - } -} - void RasterizerOpenGL::SyncState() { SyncViewport(); SyncRasterizeEnable(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d30ad698f..1f58f8791 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -28,11 +28,9 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" namespace Core::Memory { @@ -81,7 +79,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -118,36 +116,11 @@ public: return num_queued_commands > 0; } - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; - void BindComputeTextures(Shader* kernel); - - void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, - size_t& image_view_index, size_t& texture_index, size_t& image_index); - - /// Configures the current textures to use for the draw command. - void SetupDrawTextures(const Shader* shader, size_t stage_index); - - /// Configures the textures used in a compute shader. - void SetupComputeTextures(const Shader* kernel); - - /// Configures images in a graphics shader. - void SetupDrawImages(const Shader* shader, size_t stage_index); - - /// Configures images in a compute shader. - void SetupComputeImages(const Shader* shader); - /// Syncs state to match guest's void SyncState(); @@ -230,8 +203,6 @@ private: /// End a transform feedback void EndTransformFeedback(); - void SetupShaders(bool is_indexed); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -251,8 +222,6 @@ private: AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; - VideoCommon::Shader::AsyncShaders async_shaders; - boost::container::static_vector image_view_indices; std::array image_view_ids; boost::container::static_vector sampler_handles; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5a01c59ec..4dd166156 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -20,307 +20,19 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" namespace OpenGL { -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::GetUniqueIdentifier; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::Registry; -using VideoCommon::Shader::ShaderIR; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; - -namespace { - -constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; - -/// Gets the shader type from a Maxwell program type -constexpr GLenum GetGLShaderType(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_SHADER; - case ShaderType::Geometry: - return GL_GEOMETRY_SHADER; - case ShaderType::Fragment: - return GL_FRAGMENT_SHADER; - case ShaderType::Compute: - return GL_COMPUTE_SHADER; - default: - return GL_NONE; - } -} - -constexpr const char* GetShaderTypeName(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return "VS"; - case ShaderType::TesselationControl: - return "HS"; - case ShaderType::TesselationEval: - return "DS"; - case ShaderType::Geometry: - return "GS"; - case ShaderType::Fragment: - return "FS"; - case ShaderType::Compute: - return "CS"; - } - return "UNK"; -} - -constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { - switch (program_type) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - } - return {}; -} - -constexpr GLenum AssemblyEnum(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_PROGRAM_NV; - case ShaderType::TesselationControl: - return GL_TESS_CONTROL_PROGRAM_NV; - case ShaderType::TesselationEval: - return GL_TESS_EVALUATION_PROGRAM_NV; - case ShaderType::Geometry: - return GL_GEOMETRY_PROGRAM_NV; - case ShaderType::Fragment: - return GL_FRAGMENT_PROGRAM_NV; - case ShaderType::Compute: - return GL_COMPUTE_PROGRAM_NV; - } - return {}; -} - -std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { - return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); -} - -std::shared_ptr MakeRegistry(const ShaderDiskCacheEntry& entry) { - const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; - const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, - entry.graphics_info, entry.compute_info}; - auto registry = std::make_shared(entry.type, info); - for (const auto& [address, value] : entry.keys) { - const auto [buffer, offset] = address; - registry->InsertKey(buffer, offset, value); - } - for (const auto& [offset, sampler] : entry.bound_samplers) { - registry->InsertBoundSampler(offset, sampler); - } - for (const auto& [key, sampler] : entry.bindless_samplers) { - const auto [buffer, offset] = key; - registry->InsertBindlessSampler(buffer, offset, sampler); - } - return registry; -} - -std::unordered_set GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - - std::vector formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - - std::unordered_set supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast(format)); - } - return supported_formats; -} - -} // Anonymous namespace - -ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { - if (device.UseDriverCache()) { - // Ignore hint retrievable if we are using the driver cache - hint_retrievable = false; - } - const std::string shader_id = MakeShaderID(unique_identifier, shader_type); - LOG_INFO(Render_OpenGL, "{}", shader_id); - - auto program = std::make_shared(); - - if (device.UseAssemblyShaders()) { - const std::string arb = - DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); - - GLuint& arb_prog = program->assembly_program.handle; - -// Commented out functions signal OpenGL errors but are compatible with apitrace. -// Use them only to capture and replay on apitrace. -#if 0 - glGenProgramsNV(1, &arb_prog); - glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast(arb.size()), - reinterpret_cast(arb.data())); -#else - glGenProgramsARB(1, &arb_prog); - glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, - static_cast(arb.size()), arb.data()); -#endif - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - LOG_CRITICAL(Render_OpenGL, "{}", err); - LOG_INFO(Render_OpenGL, "\n{}", arb); - } - } else { - const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); - OGLShader shader; - shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); - - program->source_program.Create(true, hint_retrievable, shader.handle); - } - - return program; -} - -Shader::Shader(std::shared_ptr registry_, ShaderEntries entries_, - ProgramSharedPtr program_, bool is_built_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, - is_built{is_built_} { - handle = program->assembly_program.handle; - if (handle == 0) { - handle = program->source_program.handle; - } - if (is_built) { - ASSERT(handle != 0); - } -} +Shader::Shader() = default; Shader::~Shader() = default; -GLuint Shader::GetHandle() const { - DEBUG_ASSERT(registry->IsConsistent()); - return handle; -} - -bool Shader::IsBuilt() const { - return is_built; -} - -void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { - program->source_program = std::move(new_program); - handle = program->source_program.handle; - is_built = true; -} - -void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { - program->assembly_program = std::move(new_program); - handle = program->assembly_program.handle; - is_built = true; -} - -std::unique_ptr Shader::CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, - ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { - const auto shader_type = GetShaderType(program_type); - - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared(shader_type, gpu.Maxwell3D()); - if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = - BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr(new Shader(std::move(registry), - MakeEntries(params.device, ir, shader_type), - std::move(program), true)); - } else { - // Required for entries - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - auto entries = MakeEntries(params.device, ir, shader_type); - - async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, - std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, - COMPILER_SETTINGS, *registry, cpu_addr); - - auto program = std::make_shared(); - return std::unique_ptr( - new Shader(std::move(registry), std::move(entries), std::move(program), false)); - } -} - -std::unique_ptr Shader::CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code) { - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared(ShaderType::Compute, params.engine); - const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - const u64 uid = params.unique_identifier; - auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); - - ShaderDiskCacheEntry entry; - entry.type = ShaderType::Compute; - entry.code = std::move(code); - entry.unique_identifier = uid; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.compute_info = registry->GetComputeInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr(new Shader(std::move(registry), - MakeEntries(params.device, ir, ShaderType::Compute), - std::move(program))); -} - -std::unique_ptr Shader::CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader) { - return std::unique_ptr(new Shader( - precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); -} - ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; -void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - disk_cache.BindTitleID(title_id); - const std::optional transferable = disk_cache.LoadTransferable(); - - LOG_INFO(Render_OpenGL, "Total Shader Count: {}", - transferable.has_value() ? transferable->size() : 0); - - if (!transferable) { - return; - } - - std::vector gl_cache; - if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { - // Only load precompiled cache when we are not using assembly shaders - gl_cache = disk_cache.LoadPrecompiled(); - } - const auto supported_formats = GetSupportedFormats(); - - // Track if precompiled cache was altered during loading to know if we have to - // serialize the virtual precompiled cache file back to the hard drive - bool precompiled_cache_altered = false; - - // Inform the frontend about shader build initialization - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); - } - - std::mutex mutex; - std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex - std::atomic_bool gl_cache_failed = false; - - const auto find_precompiled = [&gl_cache](u64 id) { - return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier); - }; - - const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, - std::size_t end) { - const auto scope = context->Acquire(); - - for (std::size_t i = begin; i < end; ++i) { - if (stop_loading.stop_requested()) { - return; - } - const auto& entry = (*transferable)[i]; - const u64 uid = entry.unique_identifier; - const auto it = find_precompiled(uid); - const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; - - const bool is_compute = entry.type == ShaderType::Compute; - const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - auto registry = MakeRegistry(entry); - const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); - - ProgramSharedPtr program; - if (precompiled_entry) { - // If the shader is precompiled, attempt to load it with - program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); - if (!program) { - gl_cache_failed = true; - } - } - if (!program) { - // Otherwise compile it from GLSL - program = BuildShader(device, entry.type, uid, ir, *registry, true); - } - - PrecompiledShader shader; - shader.program = std::move(program); - shader.registry = std::move(registry); - shader.entries = MakeEntries(device, ir, entry.type); - - std::scoped_lock lock{mutex}; - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, - transferable->size()); - } - runtime_cache.emplace(entry.unique_identifier, std::move(shader)); - } - }; - - const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; - const std::size_t bucket_size{transferable->size() / num_workers}; - std::vector> contexts(num_workers); - std::vector threads(num_workers); - for (std::size_t i = 0; i < num_workers; ++i) { - const bool is_last_worker = i + 1 == num_workers; - const std::size_t start{bucket_size * i}; - const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; - - // On some platforms the shared context has to be created from the GUI thread - contexts[i] = emu_window.CreateSharedContext(); - threads[i] = std::thread(worker, contexts[i].get(), start, end); - } - for (auto& thread : threads) { - thread.join(); - } - - if (gl_cache_failed) { - // Invalidate the precompiled cache if a shader dumped shader was rejected - disk_cache.InvalidatePrecompiled(); - precompiled_cache_altered = true; - return; - } - if (stop_loading.stop_requested()) { - return; - } - - if (device.UseAssemblyShaders() || device.UseDriverCache()) { - // Don't store precompiled binaries for assembly shaders or when using the driver cache - return; - } - - // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw - // before precompiling them - - for (std::size_t i = 0; i < transferable->size(); ++i) { - const u64 id = (*transferable)[i].unique_identifier; - const auto it = find_precompiled(id); - if (it == gl_cache.end()) { - const GLuint program = runtime_cache.at(id).program->source_program.handle; - disk_cache.SavePrecompiled(id, program); - precompiled_cache_altered = true; - } - } - - if (precompiled_cache_altered) { - disk_cache.SaveVirtualPrecompiledFile(); - } -} - -ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set& supported_formats) { - if (!supported_formats.contains(precompiled_entry.binary_format)) { - LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); - return {}; - } - - auto program = std::make_shared(); - GLuint& handle = program->source_program.handle; - handle = glCreateProgram(); - glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); - glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), - static_cast(precompiled_entry.binary.size())); - - GLint link_status; - glGetProgramiv(handle, GL_LINK_STATUS, &link_status); - if (link_status == GL_FALSE) { - LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); - return {}; - } - - return program; -} - -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders) { - if (!maxwell3d.dirty.flags[Dirty::Shaders]) { - auto* last_shader = last_shaders[static_cast(program)]; - if (last_shader->IsBuilt()) { - return last_shader; - } - } - - const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; - - if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { - auto completed_work = async_shaders.GetCompletedWork(); - for (auto& work : completed_work) { - Shader* shader = TryGet(work.cpu_address); - gpu.ShaderNotify().MarkShaderComplete(); - if (shader == nullptr) { - continue; - } - using namespace VideoCommon::Shader; - if (work.backend == AsyncShaders::Backend::OpenGL) { - shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); - } else if (work.backend == AsyncShaders::Backend::GLASM) { - shader->AsyncGLASMBuilt(std::move(work.program.glasm)); - } - - auto& registry = shader->GetRegistry(); - - ShaderDiskCacheEntry entry; - entry.type = work.shader_type; - entry.code = std::move(work.code); - entry.code_b = std::move(work.code_b); - entry.unique_identifier = work.uid; - entry.bound_buffer = registry.GetBoundBuffer(); - entry.graphics_info = registry.GetGraphicsInfo(); - entry.keys = registry.GetKeys(); - entry.bound_samplers = registry.GetBoundSamplers(); - entry.bindless_samplers = registry.GetBindlessSamplers(); - disk_cache.SaveEntry(std::move(entry)); - } - } - - // Look up shader in the cache based on address - const std::optional cpu_addr{gpu_memory.GpuToCpuAddress(address)}; - if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { - return last_shaders[static_cast(program)] = shader; - } - - const u8* const host_ptr{gpu_memory.GetPointer(address)}; - - // No shader found - create a new one - ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; - ProgramCode code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; - const u8* host_ptr_b = gpu_memory.GetPointer(address_b); - code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); - } - const std::size_t code_size = code.size() * sizeof(u64); - - const u64 unique_identifier = GetUniqueIdentifier( - GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - - const ShaderParameters params{gpu, maxwell3d, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr shader; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), - async_shaders, cpu_addr.value_or(0)); - } else { - shader = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = shader.get(); - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, code_size); - } else { - null_shader = std::move(shader); - } - - return last_shaders[static_cast(program)] = result; -} - -Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { - const std::optional cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; - - if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { - return kernel; - } - - // No kernel found, create a new one - const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; - ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; - const std::size_t code_size{code.size() * sizeof(u64)}; - const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - - const ShaderParameters params{gpu, kepler_compute, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr kernel; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - kernel = Shader::CreateKernelFromMemory(params, std::move(code)); - } else { - kernel = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = kernel.get(); - if (cpu_addr) { - Register(std::move(kernel), *cpu_addr, code_size); - } else { - null_kernel = std::move(kernel); - } - return result; -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b30308b6f..ad3d15a76 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -19,10 +19,6 @@ #include "common/common_types.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -33,10 +29,6 @@ namespace Core::Frontend { class EmuWindow; } -namespace VideoCommon::Shader { -class AsyncShaders; -} - namespace OpenGL { class Device; @@ -44,77 +36,10 @@ class RasterizerOpenGL; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct ProgramHandle { - OGLProgram source_program; - OGLAssemblyProgram assembly_program; -}; -using ProgramSharedPtr = std::shared_ptr; - -struct PrecompiledShader { - ProgramSharedPtr program; - std::shared_ptr registry; - ShaderEntries entries; -}; - -struct ShaderParameters { - Tegra::GPU& gpu; - Tegra::Engines::ConstBufferEngineInterface& engine; - ShaderDiskCacheOpenGL& disk_cache; - const Device& device; - VAddr cpu_addr; - const u8* host_ptr; - u64 unique_identifier; -}; - -ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, - u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - bool hint_retrievable = false); - -class Shader final { +class Shader { public: + explicit Shader(); ~Shader(); - - /// Gets the GL program handle for the shader - GLuint GetHandle() const; - - bool IsBuilt() const; - - /// Gets the shader entries for the shader - const ShaderEntries& GetEntries() const { - return entries; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return *registry; - } - - /// Mark a OpenGL shader as built - void AsyncOpenGLBuilt(OGLProgram new_program); - - /// Mark a GLASM shader as built - void AsyncGLASMBuilt(OGLAssemblyProgram new_program); - - static std::unique_ptr CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode program_code, ProgramCode program_code_b, - VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); - - static std::unique_ptr CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code); - - static std::unique_ptr CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader); - -private: - explicit Shader(std::shared_ptr registry, ShaderEntries entries, - ProgramSharedPtr program, bool is_built_ = true); - - std::shared_ptr registry; - ShaderEntries entries; - ProgramSharedPtr program; - GLuint handle = 0; - bool is_built{}; }; class ShaderCacheOpenGL final : public VideoCommon::ShaderCache { @@ -126,36 +51,13 @@ public: Tegra::MemoryManager& gpu_memory_, const Device& device_); ~ShaderCacheOpenGL() override; - /// Loads disk cache for the current game - void LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback); - - /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders); - - /// Gets a compute kernel in the passed address - Shader* GetComputeKernel(GPUVAddr code_addr); - private: - ProgramSharedPtr GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set& supported_formats); - Core::Frontend::EmuWindow& emu_window; Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; - - ShaderDiskCacheOpenGL disk_cache; - std::unordered_map runtime_cache; - - std::unique_ptr null_shader; - std::unique_ptr null_kernel; - - std::array last_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp deleted file mode 100644 index 9c28498e8..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ /dev/null @@ -1,2986 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/div_ceil.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::Header; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::IpaMode; -using Tegra::Shader::IpaSampleMode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::TextureType; - -using namespace VideoCommon::Shader; -using namespace std::string_literals; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; - -constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; -constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; - -struct TextureOffset {}; -struct TextureDerivates {}; -using TextureArgument = std::pair; -using TextureIR = std::variant; - -constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast(Maxwell::MaxConstBufferSize) / sizeof(u32); -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); - -constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt -#define ftou floatBitsToUint -#define itof intBitsToFloat -#define utof uintBitsToFloat - -bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ - bvec2 is_nan1 = isnan(pair1); - bvec2 is_nan2 = isnan(pair2); - return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); -}} - -const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); -const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); -)"; - -class ShaderWriter final { -public: - void AddExpression(std::string_view text) { - DEBUG_ASSERT(scope >= 0); - if (!text.empty()) { - AppendIndentation(); - } - shader_source += text; - } - - // Forwards all arguments directly to libfmt. - // Note that all formatting requirements for fmt must be - // obeyed when using this function. (e.g. {{ must be used - // printing the character '{' is desirable. Ditto for }} and '}', - // etc). - template - void AddLine(std::string_view text, Args&&... args) { - AddExpression(fmt::format(fmt::runtime(text), std::forward(args)...)); - AddNewLine(); - } - - void AddNewLine() { - DEBUG_ASSERT(scope >= 0); - shader_source += '\n'; - } - - std::string GenerateTemporary() { - return fmt::format("tmp{}", temporary_index++); - } - - std::string GetResult() { - return std::move(shader_source); - } - - s32 scope = 0; - -private: - void AppendIndentation() { - shader_source.append(static_cast(scope) * 4, ' '); - } - - std::string shader_source; - u32 temporary_index = 1; -}; - -class Expression final { -public: - Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { - ASSERT(type != Type::Void); - } - Expression() : type{Type::Void} {} - - Type GetType() const { - return type; - } - - std::string GetCode() const { - return code; - } - - void CheckVoid() const { - ASSERT(type == Type::Void); - } - - std::string As(Type type_) const { - switch (type_) { - case Type::Bool: - return AsBool(); - case Type::Bool2: - return AsBool2(); - case Type::Float: - return AsFloat(); - case Type::Int: - return AsInt(); - case Type::Uint: - return AsUint(); - case Type::HalfFloat: - return AsHalfFloat(); - default: - UNREACHABLE_MSG("Invalid type"); - return code; - } - } - - std::string AsBool() const { - switch (type) { - case Type::Bool: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsBool2() const { - switch (type) { - case Type::Bool2: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsFloat() const { - switch (type) { - case Type::Float: - return code; - case Type::Uint: - return fmt::format("utof({})", code); - case Type::Int: - return fmt::format("itof({})", code); - case Type::HalfFloat: - return fmt::format("utof(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsInt() const { - switch (type) { - case Type::Float: - return fmt::format("ftoi({})", code); - case Type::Uint: - return fmt::format("int({})", code); - case Type::Int: - return code; - case Type::HalfFloat: - return fmt::format("int(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsUint() const { - switch (type) { - case Type::Float: - return fmt::format("ftou({})", code); - case Type::Uint: - return code; - case Type::Int: - return fmt::format("uint({})", code); - case Type::HalfFloat: - return fmt::format("packHalf2x16({})", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsHalfFloat() const { - switch (type) { - case Type::Float: - return fmt::format("unpackHalf2x16(ftou({}))", code); - case Type::Uint: - return fmt::format("unpackHalf2x16({})", code); - case Type::Int: - return fmt::format("unpackHalf2x16(int({}))", code); - case Type::HalfFloat: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - -private: - std::string code; - Type type{}; -}; - -const char* GetTypeString(Type type) { - switch (type) { - case Type::Bool: - return "bool"; - case Type::Bool2: - return "bvec2"; - case Type::Float: - return "float"; - case Type::Int: - return "int"; - case Type::Uint: - return "uint"; - case Type::HalfFloat: - return "vec2"; - default: - UNREACHABLE_MSG("Invalid type"); - return ""; - } -} - -const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "Buffer"; - case Tegra::Shader::ImageType::Texture1DArray: - return "1DArray"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "2DArray"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - default: - UNREACHABLE(); - return "1D"; - } -} - -/// Describes primitive behavior on geometry shaders -std::pair GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { - switch (topology) { - case Maxwell::PrimitiveTopology::Points: - return {"points", 1}; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineStrip: - return {"lines", 2}; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return {"lines_adjacency", 4}; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return {"triangles", 3}; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return {"triangles_adjacency", 6}; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return {"points", 1}; - } -} - -/// Generates code to use for a swizzle operation. -constexpr const char* GetSwizzle(std::size_t element) { - constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; - return swizzle.at(element); -} - -constexpr const char* GetColorSwizzle(std::size_t element) { - constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; - return swizzle.at(element); -} - -/// Translate topology -std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "points"; - case Tegra::Shader::OutputTopology::LineStrip: - return "line_strip"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "triangle_strip"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (const auto arithmetic = std::get_if(&meta)) { - return arithmetic->precise; - } - return false; -} - -bool IsPrecise(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - return IsPrecise(*operation); - } - return false; -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -constexpr bool IsLegacyTexCoord(Attribute::Index index) { - return static_cast(index) >= static_cast(Attribute::Index::TexCoord_0) && - static_cast(index) <= static_cast(Attribute::Index::TexCoord_7); -} - -constexpr Attribute::Index ToGenericAttribute(u64 value) { - return static_cast(value + static_cast(Attribute::Index::Attribute_0)); -} - -constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { - return static_cast(index) - static_cast(Attribute::Index::TexCoord_0); -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast(index) - static_cast(Attribute::Index::Attribute_0); -} - -constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "ssy"; - case MetaStackClass::Pbk: - return "pbk"; - } - return {}; -} - -std::string FlowStackName(MetaStackClass stack) { - return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); -} - -std::string FlowStackTopName(MetaStackClass stack) { - return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); -} - -struct GenericVaryingDescription { - std::string name; - u8 first_element = 0; - bool is_scalar = false; -}; - -class GLSLDecompiler final { -public: - explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier_, - std::string_view suffix_) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, - identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { - if (stage != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); - } - } - - void Decompile() { - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareImages(); - DeclareSamplers(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareLocalMemory(); - DeclareRegisters(); - DeclarePredicates(); - DeclareInternalFlags(); - DeclareCustomVariables(); - DeclarePhysicalAttributeReader(); - - code.AddLine("void main() {{"); - ++code.scope; - - if (stage == ShaderType::Vertex) { - code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - } - - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - - --code.scope; - code.AddLine("}}"); - } - - std::string GetResult() { - return code.GetResult(); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - void DecompileBranchMode() { - // VM's program counter - const auto first_address = ir.GetBasicBlocks().begin()->first; - code.AddLine("uint jmp_to = {}U;", first_address); - - // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems - // unlikely that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { - code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); - code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); - } - } - - code.AddLine("while (true) {{"); - ++code.scope; - - code.AddLine("switch (jmp_to) {{"); - - for (const auto& pair : ir.GetBasicBlocks()) { - const auto& [address, bb] = pair; - code.AddLine("case 0x{:X}U: {{", address); - ++code.scope; - - VisitBlock(bb); - - --code.scope; - code.AddLine("}}"); - } - - code.AddLine("default: return;"); - code.AddLine("}}"); - - --code.scope; - code.AddLine("}}"); - } - - void DecompileAST(); - - void DeclareHeader() { - if (!identifier.empty()) { - code.AddLine("// {}", identifier); - } - const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); - code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); - code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); - if (device.HasShaderBallot()) { - code.AddLine("#extension GL_ARB_shader_ballot : require"); - } - if (device.HasVertexViewportLayer()) { - code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); - } - if (device.HasImageLoadFormatted()) { - code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); - } - if (device.HasTextureShadowLod()) { - code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); - } - if (device.HasWarpIntrinsics()) { - code.AddLine("#extension GL_NV_gpu_shader5 : require"); - code.AddLine("#extension GL_NV_shader_thread_group : require"); - code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); - } - // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 - // operations) on places where we don't want to. - // Thanks to Ryujinx for finding this workaround. - code.AddLine("#pragma optionNV(fastmath off)"); - - code.AddNewLine(); - - code.AddLine(COMMON_DECLARATIONS); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - - DeclareVertexRedeclarations(); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - - const auto& info = registry.GetGraphicsInfo(); - const auto input_topology = info.primitive_topology; - const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); - max_input_vertices = max_vertices; - code.AddLine("layout ({}) in;", glsl_topology); - - const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_output_vertices = header.common4.max_output_vertices.Value(); - code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); - code.AddNewLine(); - - code.AddLine("in gl_PerVertex {{"); - ++code.scope; - code.AddLine("vec4 gl_Position;"); - --code.scope; - code.AddLine("}} gl_in[];"); - - DeclareVertexRedeclarations(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - if (ir.UsesLegacyVaryings()) { - code.AddLine("in gl_PerFragment {{"); - ++code.scope; - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_Color;"); - code.AddLine("vec4 gl_SecondaryColor;"); - --code.scope; - code.AddLine("}};"); - } - - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); - } - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const auto& info = registry.GetComputeInfo(); - if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (size > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size, limit); - size = limit; - } - - code.AddLine("shared uint smem[{}];", size / 4); - code.AddNewLine(); - } - code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", - info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); - code.AddNewLine(); - } - - void DeclareVertexRedeclarations() { - code.AddLine("out gl_PerVertex {{"); - ++code.scope; - - auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); - if (!pos_xfb.empty()) { - pos_xfb = fmt::format("layout ({}) ", pos_xfb); - } - const char* pos_type = - FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); - code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); - - for (const auto attribute : ir.GetOutputAttributes()) { - if (attribute == Attribute::Index::ClipDistances0123 || - attribute == Attribute::Index::ClipDistances4567) { - code.AddLine("float gl_ClipDistance[];"); - break; - } - } - - if (stage != ShaderType::Geometry && - (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { - if (ir.UsesLayer()) { - code.AddLine("int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("int gl_ViewportIndex;"); - } - } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && - !device.HasVertexViewportLayer()) { - LOG_ERROR( - Render_OpenGL, - "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); - } - - if (ir.UsesPointSize()) { - code.AddLine("float gl_PointSize;"); - } - - if (ir.UsesLegacyVaryings()) { - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_FrontColor;"); - code.AddLine("vec4 gl_FrontSecondaryColor;"); - code.AddLine("vec4 gl_BackColor;"); - code.AddLine("vec4 gl_BackSecondaryColor;"); - } - - --code.scope; - code.AddLine("}};"); - code.AddNewLine(); - - if (stage == ShaderType::Geometry) { - if (ir.UsesLayer()) { - code.AddLine("out int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("out int gl_ViewportIndex;"); - } - } - code.AddNewLine(); - } - - void DeclareRegisters() { - const auto& registers = ir.GetRegisters(); - for (const u32 gpr : registers) { - code.AddLine("float {} = 0.0f;", GetRegister(gpr)); - } - if (!registers.empty()) { - code.AddNewLine(); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); - } - if (num_custom_variables > 0) { - code.AddNewLine(); - } - } - - void DeclarePredicates() { - const auto& predicates = ir.GetPredicates(); - for (const auto pred : predicates) { - code.AddLine("bool {} = false;", GetPredicate(pred)); - } - if (!predicates.empty()) { - code.AddNewLine(); - } - } - - void DeclareLocalMemory() { - u64 local_memory_size = 0; - if (stage == ShaderType::Compute) { - local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - local_memory_size = header.GetLocalMemorySize(); - } - if (local_memory_size == 0) { - return; - } - const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; - code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); - code.AddNewLine(); - } - - void DeclareInternalFlags() { - for (u32 flag = 0; flag < static_cast(InternalFlag::Amount); flag++) { - const auto flag_code = static_cast(flag); - code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); - } - code.AddNewLine(); - } - - const char* GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return "smooth"; - case PixelImap::Constant: - return "flat"; - case PixelImap::ScreenLinear: - return "noperspective"; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; - } - - void DeclareInputAttributes() { - if (ir.HasPhysicalAttributes()) { - const u32 num_inputs{GetNumPhysicalInputAttributes()}; - for (u32 i = 0; i < num_inputs; ++i) { - DeclareInputAttribute(ToGenericAttribute(i), true); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetInputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareInputAttribute(index, false); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 location{GetGenericAttributeIndex(index)}; - - std::string name{GetGenericInputAttribute(index)}; - if (stage == ShaderType::Geometry) { - name = "gs_" + name + "[]"; - } - - std::string suffix_; - if (stage == ShaderType::Fragment) { - const auto input_mode{header.ps.GetPixelImap(location)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix_ = GetInputFlags(input_mode); - } - - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); - } - - void DeclareOutputAttributes() { - if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { - for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { - DeclareOutputAttribute(ToGenericAttribute(i)); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetOutputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareOutputAttribute(index); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - std::optional GetNumComponents(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return std::nullopt; - } - return it->second.components; - } - - std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - - const VaryingTFB& tfb = it->second; - return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, - tfb.offset, tfb.stride); - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - u8 element = 0; - while (element < 4) { - auto xfb = GetTransformFeedbackDecoration(index, element); - if (!xfb.empty()) { - xfb = fmt::format(", {}", xfb); - } - const std::size_t remainder = 4 - element; - const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); - const char* const type = FLOAT_TYPES.at(num_components - 1); - - const u32 location = GetGenericAttributeIndex(index); - - GenericVaryingDescription description; - description.first_element = static_cast(element); - description.is_scalar = num_components == 1; - description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); - if (element != 0 || num_components != 4) { - const std::string_view name_swizzle = swizzle.substr(element, num_components); - description.name = fmt::format("{}_{}", description.name, name_swizzle); - } - for (std::size_t i = 0; i < num_components; ++i) { - const u8 offset = static_cast(location * 4 + element + i); - varying_description.insert({offset, description}); - } - - code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, - xfb, type, description.name); - - element = static_cast(static_cast(element) + num_components); - } - } - - void DeclareConstantBuffers() { - u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& [index, info] : ir.GetConstantBuffers()) { - const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); - const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; - code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, - GetConstBufferBlock(index)); - code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareGlobalMemory() { - u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - // Since we don't know how the shader will use the shader, hint the driver to disable as - // much optimizations as possible - std::string qualifier = "coherent volatile"; - if (usage.is_read && !usage.is_written) { - qualifier += " readonly"; - } else if (usage.is_written && !usage.is_read) { - qualifier += " writeonly"; - } - - code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, - GetGlobalMemoryBlock(base)); - code.AddLine(" uint {}[];", GetGlobalMemory(base)); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareSamplers() { - u32 binding = device.GetBaseBindings(stage).sampler; - for (const auto& sampler : ir.GetSamplers()) { - const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding); - binding += sampler.is_indexed ? sampler.size : 1; - - std::string sampler_type = [&]() { - if (sampler.is_buffer) { - return "samplerBuffer"; - } - switch (sampler.type) { - case TextureType::Texture1D: - return "sampler1D"; - case TextureType::Texture2D: - return "sampler2D"; - case TextureType::Texture3D: - return "sampler3D"; - case TextureType::TextureCube: - return "samplerCube"; - default: - UNREACHABLE(); - return "sampler2D"; - } - }(); - if (sampler.is_array) { - sampler_type += "Array"; - } - if (sampler.is_shadow) { - sampler_type += "Shadow"; - } - - if (!sampler.is_indexed) { - code.AddLine("{} {} {};", description, sampler_type, name); - } else { - code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size); - } - } - if (!ir.GetSamplers().empty()) { - code.AddNewLine(); - } - } - - void DeclarePhysicalAttributeReader() { - if (!ir.HasPhysicalAttributes()) { - return; - } - code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); - ++code.scope; - code.AddLine("switch (physical_address) {{"); - - // Just declare generic attributes for now. - const auto num_attributes{static_cast(GetNumPhysicalInputAttributes())}; - for (u32 index = 0; index < num_attributes; ++index) { - const auto attribute{ToGenericAttribute(index)}; - for (u32 element = 0; element < 4; ++element) { - constexpr u32 generic_base = 0x80; - constexpr u32 generic_stride = 16; - constexpr u32 element_stride = 4; - const u32 address{generic_base + index * generic_stride + element * element_stride}; - - const bool declared = stage != ShaderType::Fragment || - header.ps.GetPixelImap(index) != PixelImap::Unused; - const std::string value = - declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; - code.AddLine("case 0x{:X}U: return {};", address, value); - } - } - - code.AddLine("default: return 0;"); - - code.AddLine("}}"); - --code.scope; - code.AddLine("}}"); - code.AddNewLine(); - } - - void DeclareImages() { - u32 binding = device.GetBaseBindings(stage).image; - for (const auto& image : ir.GetImages()) { - std::string qualifier = "coherent volatile"; - if (image.is_read && !image.is_written) { - qualifier += " readonly"; - } else if (image.is_written && !image.is_read) { - qualifier += " writeonly"; - } - - const char* format = image.is_atomic ? "r32ui, " : ""; - const char* type_declaration = GetImageTypeDeclaration(image.type); - code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, - qualifier, type_declaration, GetImage(image)); - } - if (!ir.GetImages().empty()) { - code.AddNewLine(); - } - } - - void VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node).CheckVoid(); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - const auto operation_index = static_cast(operation->GetCode()); - if (operation_index >= operation_decompilers.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); - return {}; - } - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", operation_index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {"0U", Type::Uint}; - } - return {GetRegister(index), Type::Float}; - } - - if (const auto cv = std::get_if(&*node)) { - const u32 index = cv->GetIndex(); - return {GetCustomVariable(index), Type::Float}; - } - - if (const auto immediate = std::get_if(&*node)) { - const u32 value = immediate->GetValue(); - if (value < 10) { - // For eyecandy avoid using hex numbers on single digits - return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; - } - return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if(&*node)) { - const auto value = [&]() -> std::string { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return "true"; - case Tegra::Shader::Pred::NeverExecute: - return "false"; - default: - return GetPredicate(index); - } - }(); - if (predicate->IsNegated()) { - return {fmt::format("!({})", value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if(&*node)) { - UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, - "Physical attributes in geometry shaders are not implemented"); - if (abuf->IsPhysicalBuffer()) { - return {fmt::format("ReadPhysicalAttribute({})", - Visit(abuf->GetPhysicalAddress()).AsUint()), - Type::Float}; - } - return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); - } - - if (const auto cbuf = std::get_if(&*node)) { - const Node offset = cbuf->GetOffset(); - - if (const auto immediate = std::get_if(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); - return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), - offset_imm / (4 * 4), (offset_imm / 4) % 4), - Type::Uint}; - } - - // Indirect access - const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); - - if (!device.HasComponentIndexingBug()) { - return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset), - Type::Uint}; - } - - // AMD's proprietary GLSL compiler emits ill code for variable component access. - // To bypass this driver bug generate 4 ifs, one per each component. - const std::string pack = code.GenerateTemporary(); - code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), - final_offset); - - const std::string result = code.GenerateTemporary(); - code.AddLine("uint {};", result); - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, - GetSwizzle(swizzle)); - } - return {result, Type::Uint}; - } - - if (const auto gmem = std::get_if(&*node)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } - - if (const auto lmem = std::get_if(&*node)) { - return { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } - - if (const auto smem = std::get_if(&*node)) { - return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } - - if (const auto internal_flag = std::get_if(&*node)) { - return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - // It's invalid to call conditional on nested nodes, use an operation instead - code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); - ++code.scope; - - VisitBlock(conditional->GetCode()); - - --code.scope; - code.AddLine("}}"); - return {}; - } - - if (const auto comment = std::get_if(&*node)) { - code.AddLine("// " + comment->GetText()); - return {}; - } - - UNREACHABLE(); - return {}; - } - - Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { - const auto GeometryPass = [&](std::string_view name) { - if (stage == ShaderType::Geometry && buffer) { - // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games - // set an 0x80000000 index for those and the shader fails to build. Find out why - // this happens and what's its intent. - return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), - max_input_vertices.value()); - } - return std::string(name); - }; - - switch (attribute) { - case Attribute::Index::Position: - switch (stage) { - case ShaderType::Geometry: - return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), - GetSwizzle(element)), - Type::Float}; - case ShaderType::Fragment: - return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; - default: - UNREACHABLE(); - return {"0", Type::Int}; - } - case Attribute::Index::FrontColor: - return {"gl_Color"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::FrontSecondaryColor: - return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return {"gl_PointCoord.x", Type::Float}; - case 1: - return {"gl_PointCoord.y", Type::Float}; - case 2: - case 3: - return {"0.0f", Type::Float}; - } - UNREACHABLE(); - return {"0", Type::Int}; - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - // Config pack's first value is instance_id. - return {"gl_InstanceID", Type::Int}; - case 3: - return {"gl_VertexID", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {"0", Type::Int}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - switch (element) { - case 3: - return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {"0", Type::Int}; - default: - if (IsGenericAttribute(attribute)) { - return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), - Type::Float}; - } - if (IsLegacyTexCoord(attribute)) { - UNIMPLEMENTED_IF(stage == ShaderType::Geometry); - return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}; - } - break; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {"0", Type::Int}; - } - - Expression ApplyPrecise(Operation operation, std::string value, Type type) { - if (!IsPrecise(operation)) { - return {std::move(value), type}; - } - // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to - // be found in fragment shaders, so we disable precise there. There are vertex shaders that - // also fail to build but nobody seems to care about those. - // Note: Only bugged drivers will skip precise. - const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; - - std::string temporary = code.GenerateTemporary(); - code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), - temporary, value); - return {std::move(temporary), type}; - } - - Expression VisitOperand(Operation operation, std::size_t operand_index) { - const auto& operand = operation[operand_index]; - const bool parent_precise = IsPrecise(operation); - const bool child_precise = IsPrecise(operand); - const bool child_trivial = !std::holds_alternative(*operand); - if (!parent_precise || child_precise || child_trivial) { - return Visit(operand); - } - - Expression value = Visit(operand); - std::string temporary = code.GenerateTemporary(); - code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); - return {std::move(temporary), value.GetType()}; - } - - std::optional GetOutputAttribute(const AbufNode* abuf) { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex()) { - case Attribute::Index::Position: - return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return std::nullopt; - case 1: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_Layer", Type::Int}}; - case 2: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_ViewportIndex", Type::Int}}; - case 3: - return {{"gl_PointSize", Type::Float}}; - } - return std::nullopt; - case Attribute::Index::FrontColor: - return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::FrontSecondaryColor: - return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackColor: - return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackSecondaryColor: - return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::ClipDistances0123: - return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; - case Attribute::Index::ClipDistances4567: - return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; - default: - if (IsGenericAttribute(attribute)) { - return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; - } - if (IsLegacyTexCoord(attribute)) { - return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); - return std::nullopt; - } - } - - Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, - Type type_a) { - std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b, Type type_c) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, - Type type_a, Type type_b, Type type_c, Type type_d) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - const std::string op_d = VisitOperand(operation, 3).As(type_d); - std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - std::string GenerateTexture(Operation operation, const std::string& function_suffix, - const std::vector& extras, bool separate_dc = false) { - constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; - - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - const std::size_t count = operation.GetOperandsCount(); - const bool has_array = meta->sampler.is_array; - const bool has_shadow = meta->sampler.is_shadow; - const bool workaround_lod_array_shadow_as_grad = - !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube); - - std::string expr = "texture"; - - if (workaround_lod_array_shadow_as_grad) { - expr += "Grad"; - } else { - expr += function_suffix; - } - - if (!meta->aoffi.empty()) { - expr += "Offset"; - } else if (!meta->ptp.empty()) { - expr += "Offsets"; - } - if (!meta->sampler.is_indexed) { - expr += '(' + GetSampler(meta->sampler) + ", "; - } else { - expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; - } - expr += coord_constructors.at(count + (has_array ? 1 : 0) + - (has_shadow && !separate_dc ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - expr += Visit(operation[i]).AsFloat(); - - const std::size_t next = i + 1; - if (next < count) - expr += ", "; - } - if (has_array) { - expr += ", float(" + Visit(meta->array).AsInt() + ')'; - } - if (has_shadow) { - if (separate_dc) { - expr += "), " + Visit(meta->depth_compare).AsFloat(); - } else { - expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; - } - } else { - expr += ')'; - } - - if (workaround_lod_array_shadow_as_grad) { - switch (meta->sampler.type) { - case TextureType::Texture2D: - return expr + ", vec2(0.0), vec2(0.0))"; - case TextureType::TextureCube: - return expr + ", vec3(0.0), vec3(0.0))"; - default: - UNREACHABLE(); - break; - } - } - - for (const auto& variant : extras) { - if (const auto argument = std::get_if(&variant)) { - expr += GenerateTextureArgument(*argument); - } else if (std::holds_alternative(variant)) { - if (!meta->aoffi.empty()) { - expr += GenerateTextureAoffi(meta->aoffi); - } else if (!meta->ptp.empty()) { - expr += GenerateTexturePtp(meta->ptp); - } - } else if (std::holds_alternative(variant)) { - expr += GenerateTextureDerivates(meta->derivates); - } else { - UNREACHABLE(); - } - } - - return expr + ')'; - } - - std::string GenerateTextureArgument(const TextureArgument& argument) { - const auto& [type, operand] = argument; - if (operand == nullptr) { - return {}; - } - - std::string expr = ", "; - switch (type) { - case Type::Int: - if (const auto immediate = std::get_if(&*operand)) { - // Inline the string as an immediate integer in GLSL (some extra arguments are - // required to be constant) - expr += std::to_string(static_cast(immediate->GetValue())); - } else { - expr += Visit(operand).AsInt(); - } - break; - case Type::Float: - expr += Visit(operand).AsFloat(); - break; - default: { - const auto type_int = static_cast(type); - UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); - expr += '0'; - break; - } - } - return expr; - } - - std::string ReadTextureOffset(const Node& value) { - if (const auto immediate = std::get_if(&*value)) { - // Inline the string as an immediate integer in GLSL (AOFFI arguments are required - // to be constant by the standard). - return std::to_string(static_cast(immediate->GetValue())); - } else if (device.HasVariableAoffi()) { - // Avoid using variable AOFFI on unsupported devices. - return Visit(value).AsInt(); - } else { - // Insert 0 on devices not supporting variable AOFFI. - return "0"; - } - } - - std::string GenerateTextureAoffi(const std::vector& aoffi) { - if (aoffi.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; - std::string expr = ", "; - expr += coord_constructors.at(aoffi.size() - 1); - expr += '('; - - for (std::size_t index = 0; index < aoffi.size(); ++index) { - expr += ReadTextureOffset(aoffi.at(index)); - if (index + 1 < aoffi.size()) { - expr += ", "; - } - } - expr += ')'; - - return expr; - } - - std::string GenerateTexturePtp(const std::vector& ptp) { - static constexpr std::size_t num_vectors = 4; - ASSERT(ptp.size() == num_vectors * 2); - - std::string expr = ", ivec2[]("; - for (std::size_t vector = 0; vector < num_vectors; ++vector) { - const bool has_next = vector + 1 < num_vectors; - expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), - ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); - } - expr += ')'; - return expr; - } - - std::string GenerateTextureDerivates(const std::vector& derivates) { - if (derivates.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"float", "vec2", "vec3"}; - std::string expr = ", "; - const std::size_t components = derivates.size() / 2; - std::string dx = coord_constructors.at(components - 1); - std::string dy = coord_constructors.at(components - 1); - dx += '('; - dy += '('; - - for (std::size_t index = 0; index < components; ++index) { - const auto& operand_x{derivates.at(index * 2)}; - const auto& operand_y{derivates.at(index * 2 + 1)}; - dx += Visit(operand_x).AsFloat(); - dy += Visit(operand_y).AsFloat(); - - if (index + 1 < components) { - dx += ", "; - dy += ", "; - } - } - dx += ')'; - dy += ')'; - expr += dx + ", " + dy; - - return expr; - } - - std::string BuildIntegerCoordinates(Operation operation) { - constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; - const std::size_t coords_count{operation.GetOperandsCount()}; - std::string expr = constructors.at(coords_count - 1); - for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - if (i + 1 < coords_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - std::string BuildImageValues(Operation operation) { - constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; - const auto& meta{std::get(operation.GetMeta())}; - - const std::size_t values_count{meta.values.size()}; - std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); - for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)).AsUint(); - if (i + 1 < values_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit the source - // as it might have side effects. - code.AddLine("{};", Visit(src).GetCode()); - return {}; - } - target = {GetRegister(gpr->GetIndex()), Type::Float}; - } else if (const auto abuf = std::get_if(&*dest)) { - UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); - auto output = GetOutputAttribute(abuf); - if (!output) { - return {}; - } - target = std::move(*output); - } else if (const auto lmem = std::get_if(&*dest)) { - target = { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } else if (const auto smem = std::get_if(&*dest)) { - ASSERT(stage == ShaderType::Compute); - target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } else if (const auto gmem = std::get_if(&*dest)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } else if (const auto cv = std::get_if(&*dest)) { - target = {GetCustomVariable(cv->GetIndex()), Type::Float}; - } else { - UNREACHABLE_MSG("Assign called without a proper target"); - } - - code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); - return {}; - } - - template - Expression Add(Operation operation) { - return GenerateBinaryInfix(operation, "+", type, type, type); - } - - template - Expression Mul(Operation operation) { - return GenerateBinaryInfix(operation, "*", type, type, type); - } - - template - Expression Div(Operation operation) { - return GenerateBinaryInfix(operation, "/", type, type, type); - } - - template - Expression Fma(Operation operation) { - return GenerateTernary(operation, "fma", type, type, type, type); - } - - template - Expression Negate(Operation operation) { - return GenerateUnary(operation, "-", type, type); - } - - template - Expression Absolute(Operation operation) { - return GenerateUnary(operation, "abs", type, type); - } - - Expression FClamp(Operation operation) { - return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, - Type::Float); - } - - Expression FCastHalf0(Operation operation) { - return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression FCastHalf1(Operation operation) { - return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - template - Expression Min(Operation operation) { - return GenerateBinaryCall(operation, "min", type, type, type); - } - - template - Expression Max(Operation operation) { - return GenerateBinaryCall(operation, "max", type, type, type); - } - - Expression Select(Operation operation) { - const std::string condition = Visit(operation[0]).AsBool(); - const std::string true_case = Visit(operation[1]).AsUint(); - const std::string false_case = Visit(operation[2]).AsUint(); - std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); - - return ApplyPrecise(operation, std::move(op_str), Type::Uint); - } - - Expression FCos(Operation operation) { - return GenerateUnary(operation, "cos", Type::Float, Type::Float); - } - - Expression FSin(Operation operation) { - return GenerateUnary(operation, "sin", Type::Float, Type::Float); - } - - Expression FExp2(Operation operation) { - return GenerateUnary(operation, "exp2", Type::Float, Type::Float); - } - - Expression FLog2(Operation operation) { - return GenerateUnary(operation, "log2", Type::Float, Type::Float); - } - - Expression FInverseSqrt(Operation operation) { - return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); - } - - Expression FSqrt(Operation operation) { - return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); - } - - Expression FRoundEven(Operation operation) { - return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); - } - - Expression FFloor(Operation operation) { - return GenerateUnary(operation, "floor", Type::Float, Type::Float); - } - - Expression FCeil(Operation operation) { - return GenerateUnary(operation, "ceil", Type::Float, Type::Float); - } - - Expression FTrunc(Operation operation) { - return GenerateUnary(operation, "trunc", Type::Float, Type::Float); - } - - template - Expression FCastInteger(Operation operation) { - return GenerateUnary(operation, "float", Type::Float, type); - } - - Expression FSwizzleAdd(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsFloat(); - const std::string op_b = VisitOperand(operation, 1).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {fmt::format("{} + {}", op_a, op_b), Type::Float}; - } - - const std::string instr_mask = VisitOperand(operation, 2).AsUint(); - const std::string mask = code.GenerateTemporary(); - code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, - instr_mask); - - const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); - const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); - return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), - Type::Float}; - } - - Expression ICastFloat(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Float); - } - - Expression ICastUnsigned(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Uint); - } - - template - Expression LogicalShiftLeft(Operation operation) { - return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); - } - - Expression ILogicalShiftRight(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - const std::string op_b = VisitOperand(operation, 1).AsUint(); - std::string op_str = fmt::format("int({} >> {})", op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), Type::Int); - } - - Expression IArithmeticShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); - } - - template - Expression BitwiseAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&", type, type, type); - } - - template - Expression BitwiseOr(Operation operation) { - return GenerateBinaryInfix(operation, "|", type, type, type); - } - - template - Expression BitwiseXor(Operation operation) { - return GenerateBinaryInfix(operation, "^", type, type, type); - } - - template - Expression BitwiseNot(Operation operation) { - return GenerateUnary(operation, "~", type, type); - } - - Expression UCastFloat(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Float); - } - - Expression UCastSigned(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Int); - } - - Expression UShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); - } - - template - Expression BitfieldInsert(Operation operation) { - return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, - Type::Int); - } - - template - Expression BitfieldExtract(Operation operation) { - return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); - } - - template - Expression BitCount(Operation operation) { - return GenerateUnary(operation, "bitCount", type, type); - } - - template - Expression BitMSB(Operation operation) { - return GenerateUnary(operation, "findMSB", type, type); - } - - Expression HNegate(Operation operation) { - const auto GetNegate = [&](std::size_t index) { - return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; - }; - return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), - GetNegate(1), GetNegate(2)), - Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsHalfFloat(); - const std::string min = VisitOperand(operation, 1).AsFloat(); - const std::string max = VisitOperand(operation, 2).AsFloat(); - std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); - - return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); - } - - Expression HCastFloat(Operation operation) { - return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()), - Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = VisitOperand(operation, 0); - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: - return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H0_H0: - return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H1_H1: - return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression HMergeF32(Operation operation) { - return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression HMergeH0(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), - Type::HalfFloat}; - } - - Expression HMergeH1(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), - Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::HalfFloat}; - } - - template - Expression Comparison(Operation operation) { - static_assert(!unordered || type == Type::Float); - - Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); - - if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { - // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's - // and Nvidia's proprietary stacks. Manually force an ordered comparison. - return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - if constexpr (!unordered) { - return expr; - } - // Unordered comparisons are always true for NaN operands. - return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FOrdered(Operation operation) { - return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FUnordered(Operation operation) { - return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression LogicalAddCarry(Operation operation) { - const std::string carry = code.GenerateTemporary(); - code.AddLine("uint {};", carry); - code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), - VisitOperand(operation, 1).AsUint(), carry); - return {fmt::format("({} != 0)", carry), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = GetPredicate(index); - } else if (const auto flag = std::get_if(&*dest)) { - target = GetInternalFlag(flag->GetFlag()); - } - - code.AddLine("{} = {};", target, Visit(src).AsBool()); - return {}; - } - - Expression LogicalAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalOr(Operation operation) { - return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalXor(Operation operation) { - return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalNegate(Operation operation) { - return GenerateUnary(operation, "!", Type::Bool, Type::Bool); - } - - Expression LogicalPick2(Operation operation) { - return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), - VisitOperand(operation, 1).AsUint()), - Type::Bool}; - } - - Expression LogicalAnd2(Operation operation) { - return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); - } - - template - Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { - Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, - Type::HalfFloat, Type::HalfFloat); - if constexpr (!with_nan) { - return comparison; - } - return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), - VisitOperand(operation, 0).AsHalfFloat(), - VisitOperand(operation, 1).AsHalfFloat()), - Type::Bool2}; - } - - template - Expression Logical2HLessThan(Operation operation) { - return GenerateHalfComparison(operation, "lessThan"); - } - - template - Expression Logical2HEqual(Operation operation) { - return GenerateHalfComparison(operation, "equal"); - } - - template - Expression Logical2HLessEqual(Operation operation) { - return GenerateHalfComparison(operation, "lessThanEqual"); - } - - template - Expression Logical2HGreaterThan(Operation operation) { - return GenerateHalfComparison(operation, "greaterThan"); - } - - template - Expression Logical2HNotEqual(Operation operation) { - return GenerateHalfComparison(operation, "notEqual"); - } - - template - Expression Logical2HGreaterEqual(Operation operation) { - return GenerateHalfComparison(operation, "greaterThanEqual"); - } - - Expression Texture(Operation operation) { - const auto meta = std::get(operation.GetMeta()); - const bool separate_dc = meta.sampler.type == TextureType::TextureCube && - meta.sampler.is_array && meta.sampler.is_shadow; - // TODO: Replace this with an array and make GenerateTexture use C++20 std::span - const std::vector extras{ - TextureOffset{}, - TextureArgument{Type::Float, meta.bias}, - }; - std::string expr = GenerateTexture(operation, "", extras, separate_dc); - if (meta.sampler.is_shadow) { - expr = fmt::format("vec4({})", expr); - } - return {expr + GetSwizzle(meta.element), Type::Float}; - } - - Expression TextureLod(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - std::string expr{}; - - if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube)) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); - expr = GenerateTexture(operation, "Lod", {}); - } else { - expr = GenerateTexture(operation, "Lod", - {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); - } - - if (meta->sampler.is_shadow) { - expr = "vec4(" + expr + ')'; - } - return {expr + GetSwizzle(meta->element), Type::Float}; - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; - const bool separate_dc = meta.sampler.is_shadow; - - std::vector ir_; - if (meta.sampler.is_shadow) { - ir_ = {TextureOffset{}}; - } else { - ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; - } - return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), - Type::Float}; - } - - Expression TextureQueryDimensions(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - const std::string sampler = GetSampler(meta->sampler); - const std::string lod = VisitOperand(operation, 0).AsInt(); - - switch (meta->element) { - case 0: - case 1: - return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), - Type::Int}; - case 3: - return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - if (meta->element < 2) { - return {fmt::format("int(({} * vec2(256)){})", - GenerateTexture(operation, "QueryLod", {}), - GetSwizzle(meta->element)), - Type::Int}; - } - return {"0", Type::Int}; - } - - Expression TexelFetch(Operation operation) { - constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - UNIMPLEMENTED_IF(meta->sampler.is_array); - const std::size_t count = operation.GetOperandsCount(); - - std::string expr = "texelFetch("; - expr += GetSampler(meta->sampler); - expr += ", "; - - expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - if (i > 0) { - expr += ", "; - } - expr += VisitOperand(operation, i).AsInt(); - } - if (meta->array) { - expr += ", "; - expr += Visit(meta->array).AsInt(); - } - expr += ')'; - - if (meta->lod && !meta->sampler.is_buffer) { - expr += ", "; - expr += Visit(meta->lod).AsInt(); - } - expr += ')'; - expr += GetSwizzle(meta->element); - - return {std::move(expr), Type::Float}; - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - std::string expr = - GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}}); - return {std::move(expr) + GetSwizzle(meta.element), Type::Float}; - } - - Expression ImageLoad(Operation operation) { - if (!device.HasImageLoadFormatted()) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); - return {"0", Type::Int}; - } - - const auto& meta{std::get(operation.GetMeta())}; - return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), - BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), - Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), - BuildIntegerCoordinates(operation), BuildImageValues(operation)); - return {}; - } - - template - Expression AtomicImage(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), - BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()), - Type::Uint}; - } - - template - Expression Atomic(Operation operation) { - if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { - UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); - return {}; - } - return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), - Visit(operation[1]).AsUint()), - Type::Uint}; - } - - template - Expression Reduce(Operation operation) { - code.AddLine("{};", Atomic(operation).GetCode()); - return {}; - } - - Expression Branch(Operation operation) { - const auto target = std::get_if(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); - code.AddLine("break;"); - return {}; - } - - Expression BranchIndirect(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - - code.AddLine("jmp_to = {};", op_a); - code.AddLine("break;"); - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const auto target = std::get_if(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), - target->GetValue()); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); - code.AddLine("break;"); - return {}; - } - - void PreExit() { - if (stage != ShaderType::Fragment) { - return; - } - const auto& used_registers = ir.GetRegisters(); - const auto SafeGetRegister = [&](u32 reg) -> Expression { - // TODO(Rodrigo): Replace with contains once C++20 releases - if (used_registers.find(reg) != used_registers.end()) { - return {GetRegister(reg), Type::Float}; - } - return {"0.0f", Type::Float}; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), - SafeGetRegister(current_reg).AsFloat()); - ++current_reg; - } - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and current_reg - // already contains one past the last color register. - code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); - } - } - - Expression Exit(Operation operation) { - PreExit(); - code.AddLine("return;"); - return {}; - } - - Expression Discard(Operation operation) { - // Enclose "discard" in a conditional, so that GLSL compilation does not complain - // about unexecuted instructions that may follow this. - code.AddLine("if (true) {{"); - ++code.scope; - code.AddLine("discard;"); - --code.scope; - code.AddLine("}}"); - return {}; - } - - Expression EmitVertex(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EmitVertex is expected to be used in a geometry shader."); - code.AddLine("EmitVertex();"); - return {}; - } - - Expression EndPrimitive(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EndPrimitive is expected to be used in a geometry shader."); - code.AddLine("EndPrimitive();"); - return {}; - } - - Expression InvocationId(Operation operation) { - return {"gl_InvocationID", Type::Int}; - } - - Expression YNegate(Operation operation) { - // Y_NEGATE is mapped to this uniform value - return {"gl_FrontMaterial.ambient.a", Type::Float}; - } - - template - Expression LocalInvocationId(Operation) { - return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; - } - - template - Expression WorkGroupId(Operation) { - return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub on non-Nvidia devices by simulating all threads voting the same as the active - // one. - return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; - } - return {fmt::format("ballotThreadNV({})", value), Type::Uint}; - } - - Expression Vote(Operation operation, const char* func) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub with a warp size of one. - return {value, Type::Bool}; - } - return {fmt::format("{}({})", func, value), Type::Bool}; - } - - Expression VoteAll(Operation operation) { - return Vote(operation, "allThreadsNV"); - } - - Expression VoteAny(Operation operation) { - return Vote(operation, "anyThreadNV"); - } - - Expression VoteEqual(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // We must return true here since a stub for a theoretical warp size of 1. - // This will always return an equal result across all votes. - return {"true", Type::Bool}; - } - return Vote(operation, "allThreadsEqualNV"); - } - - Expression ThreadId(Operation operation) { - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {"0U", Type::Uint}; - } - return {"gl_SubGroupInvocationARB", Type::Uint}; - } - - template - Expression ThreadMask(Operation) { - if (device.HasWarpIntrinsics()) { - return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; - } - if (device.HasShaderBallot()) { - return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; - } - LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); - return {"0U", Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - std::string value = VisitOperand(operation, 0).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {std::move(value), Type::Float}; - } - - const std::string index = VisitOperand(operation, 1).AsUint(); - return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); - return {}; - } - code.AddLine("barrier();"); - return {}; - } - - Expression MemoryBarrierGroup(Operation) { - code.AddLine("groupMemoryBarrier();"); - return {}; - } - - Expression MemoryBarrierGlobal(Operation) { - code.AddLine("memoryBarrier();"); - return {}; - } - - struct Func final { - Func() = delete; - ~Func() = delete; - - static constexpr std::string_view LessThan = "<"; - static constexpr std::string_view Equal = "=="; - static constexpr std::string_view LessEqual = "<="; - static constexpr std::string_view GreaterThan = ">"; - static constexpr std::string_view NotEqual = "!="; - static constexpr std::string_view GreaterEqual = ">="; - - static constexpr std::string_view Eq = "Eq"; - static constexpr std::string_view Ge = "Ge"; - static constexpr std::string_view Gt = "Gt"; - static constexpr std::string_view Le = "Le"; - static constexpr std::string_view Lt = "Lt"; - - static constexpr std::string_view Add = "Add"; - static constexpr std::string_view Min = "Min"; - static constexpr std::string_view Max = "Max"; - static constexpr std::string_view And = "And"; - static constexpr std::string_view Or = "Or"; - static constexpr std::string_view Xor = "Xor"; - static constexpr std::string_view Exchange = "Exchange"; - }; - - static constexpr std::array operation_decompilers = { - &GLSLDecompiler::Assign, - - &GLSLDecompiler::Select, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Fma, - &GLSLDecompiler::Negate, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::FClamp, - &GLSLDecompiler::FCastHalf0, - &GLSLDecompiler::FCastHalf1, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - &GLSLDecompiler::FCos, - &GLSLDecompiler::FSin, - &GLSLDecompiler::FExp2, - &GLSLDecompiler::FLog2, - &GLSLDecompiler::FInverseSqrt, - &GLSLDecompiler::FSqrt, - &GLSLDecompiler::FRoundEven, - &GLSLDecompiler::FFloor, - &GLSLDecompiler::FCeil, - &GLSLDecompiler::FTrunc, - &GLSLDecompiler::FCastInteger, - &GLSLDecompiler::FCastInteger, - &GLSLDecompiler::FSwizzleAdd, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Negate, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - - &GLSLDecompiler::ICastFloat, - &GLSLDecompiler::ICastUnsigned, - &GLSLDecompiler::LogicalShiftLeft, - &GLSLDecompiler::ILogicalShiftRight, - &GLSLDecompiler::IArithmeticShiftRight, - &GLSLDecompiler::BitwiseAnd, - &GLSLDecompiler::BitwiseOr, - &GLSLDecompiler::BitwiseXor, - &GLSLDecompiler::BitwiseNot, - &GLSLDecompiler::BitfieldInsert, - &GLSLDecompiler::BitfieldExtract, - &GLSLDecompiler::BitCount, - &GLSLDecompiler::BitMSB, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - &GLSLDecompiler::UCastFloat, - &GLSLDecompiler::UCastSigned, - &GLSLDecompiler::LogicalShiftLeft, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::BitwiseAnd, - &GLSLDecompiler::BitwiseOr, - &GLSLDecompiler::BitwiseXor, - &GLSLDecompiler::BitwiseNot, - &GLSLDecompiler::BitfieldInsert, - &GLSLDecompiler::BitfieldExtract, - &GLSLDecompiler::BitCount, - &GLSLDecompiler::BitMSB, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Fma, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::HNegate, - &GLSLDecompiler::HClamp, - &GLSLDecompiler::HCastFloat, - &GLSLDecompiler::HUnpack, - &GLSLDecompiler::HMergeF32, - &GLSLDecompiler::HMergeH0, - &GLSLDecompiler::HMergeH1, - &GLSLDecompiler::HPack2, - - &GLSLDecompiler::LogicalAssign, - &GLSLDecompiler::LogicalAnd, - &GLSLDecompiler::LogicalOr, - &GLSLDecompiler::LogicalXor, - &GLSLDecompiler::LogicalNegate, - &GLSLDecompiler::LogicalPick2, - &GLSLDecompiler::LogicalAnd2, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::FOrdered, - &GLSLDecompiler::FUnordered, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::LogicalAddCarry, - - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, - - &GLSLDecompiler::Texture, - &GLSLDecompiler::TextureLod, - &GLSLDecompiler::TextureGather, - &GLSLDecompiler::TextureQueryDimensions, - &GLSLDecompiler::TextureQueryLod, - &GLSLDecompiler::TexelFetch, - &GLSLDecompiler::TextureGradient, - - &GLSLDecompiler::ImageLoad, - &GLSLDecompiler::ImageStore, - - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - - &GLSLDecompiler::Branch, - &GLSLDecompiler::BranchIndirect, - &GLSLDecompiler::PushFlowStack, - &GLSLDecompiler::PopFlowStack, - &GLSLDecompiler::Exit, - &GLSLDecompiler::Discard, - - &GLSLDecompiler::EmitVertex, - &GLSLDecompiler::EndPrimitive, - - &GLSLDecompiler::InvocationId, - &GLSLDecompiler::YNegate, - &GLSLDecompiler::LocalInvocationId<0>, - &GLSLDecompiler::LocalInvocationId<1>, - &GLSLDecompiler::LocalInvocationId<2>, - &GLSLDecompiler::WorkGroupId<0>, - &GLSLDecompiler::WorkGroupId<1>, - &GLSLDecompiler::WorkGroupId<2>, - - &GLSLDecompiler::BallotThread, - &GLSLDecompiler::VoteAll, - &GLSLDecompiler::VoteAny, - &GLSLDecompiler::VoteEqual, - - &GLSLDecompiler::ThreadId, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ShuffleIndexed, - - &GLSLDecompiler::Barrier, - &GLSLDecompiler::MemoryBarrierGroup, - &GLSLDecompiler::MemoryBarrierGlobal, - }; - static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); - - std::string GetRegister(u32 index) const { - return AppendSuffix(index, "gpr"); - } - - std::string GetCustomVariable(u32 index) const { - return AppendSuffix(index, "custom_var"); - } - - std::string GetPredicate(Tegra::Shader::Pred pred) const { - return AppendSuffix(static_cast(pred), "pred"); - } - - std::string GetGenericInputAttribute(Attribute::Index attribute) const { - return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); - } - - std::unordered_map varying_description; - - std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { - const u8 offset = static_cast(GetGenericAttributeIndex(attribute) * 4 + element); - const auto& description = varying_description.at(offset); - if (description.is_scalar) { - return description.name; - } - return fmt::format("{}[{}]", description.name, element - description.first_element); - } - - std::string GetConstBuffer(u32 index) const { - return AppendSuffix(index, "cbuf"); - } - - std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); - } - - std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, - suffix); - } - - std::string GetConstBufferBlock(u32 index) const { - return AppendSuffix(index, "cbuf_block"); - } - - std::string GetLocalMemory() const { - if (suffix.empty()) { - return "lmem"; - } else { - return "lmem_" + std::string{suffix}; - } - } - - std::string GetInternalFlag(InternalFlag flag) const { - constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", - "overflow_flag"}; - const auto index = static_cast(flag); - ASSERT(index < static_cast(InternalFlag::Amount)); - - if (suffix.empty()) { - return InternalFlagNames[index]; - } else { - return fmt::format("{}_{}", InternalFlagNames[index], suffix); - } - } - - std::string GetSampler(const SamplerEntry& sampler) const { - return AppendSuffix(sampler.index, "sampler"); - } - - std::string GetImage(const ImageEntry& image) const { - return AppendSuffix(image.index, "image"); - } - - std::string AppendSuffix(u32 index, std::string_view name) const { - if (suffix.empty()) { - return fmt::format("{}{}", name, index); - } else { - return fmt::format("{}{}_{}", name, index, suffix); - } - } - - u32 GetNumPhysicalInputAttributes() const { - return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); - } - - u32 GetNumPhysicalAttributes() const { - return std::min(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); - } - - u32 GetNumPhysicalVaryings() const { - return std::min(device.GetMaxVaryings(), Maxwell::NumVaryings); - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - const std::string_view identifier; - const std::string_view suffix; - const Header header; - std::unordered_map transform_feedback; - - ShaderWriter code; - - std::optional max_input_vertices; -}; - -std::string GetFlowVariable(u32 index) { - return fmt::format("flow_var{}", index); -} - -class ExprDecompiler { -public: - explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ExprAnd& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += '!'; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - const auto pred = static_cast(expr.predicate); - inner += decomp.GetPredicate(pred); - } - - void operator()(const ExprCondCode& expr) { - inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool(); - } - - void operator()(const ExprVar& expr) { - inner += GetFlowVariable(expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(VideoCommon::Shader::ExprGprEqual& expr) { - inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - GLSLDecompiler& decomp; - std::string inner; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()(const ASTIfElse& ast) { - decomp.code.AddLine("else {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - decomp.code.AddLine("// Label_{}:", ast.index); - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("do {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}} while({});", expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - if (ast.kills) { - decomp.code.AddLine("discard;"); - } else { - decomp.PreExit(); - decomp.code.AddLine("return;"); - } - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void operator()(const ASTBreak& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - decomp.code.AddLine("break;"); - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - GLSLDecompiler& decomp; -}; - -void GLSLDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - code.AddLine("bool {} = false;", GetFlowVariable(i)); - } - - ASTDecompiler decompiler{*this}; - decompiler.Visit(ir.GetASTProgram()); -} - -} // Anonymous namespace - -ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), - cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, - usage.is_written); - } - for (const auto& sampler : ir.GetSamplers()) { - entries.samplers.emplace_back(sampler); - } - for (const auto& image : ir.GetImages()) { - entries.images.emplace_back(image); - } - const auto clip_distances = ir.GetClipDistances(); - for (std::size_t i = 0; i < std::size(clip_distances); ++i) { - entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.shader_length = ir.GetLength(); - return entries; -} - -std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier, - std::string_view suffix) { - GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); - decompiler.Decompile(); - return decompiler.GetResult(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index 0397a000c..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace OpenGL { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) - : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} - - u32 GetIndex() const { - return index; - } - -private: - u32 index = 0; -}; - -struct GlobalMemoryEntry { - constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, - bool is_written_) - : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ - is_written_} {} - - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - bool is_read = false; - bool is_written = false; -}; - -struct ShaderEntries { - std::vector const_buffers; - std::vector global_memory_entries; - std::vector samplers; - std::vector images; - std::size_t shader_length{}; - u32 clip_distances{}; - u32 enabled_uniform_buffers{}; -}; - -ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage); - -std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier, - std::string_view suffix = {}); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp deleted file mode 100644 index 0deb86517..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/fs/file.h" -#include "common/fs/fs.h" -#include "common/fs/path_util.h" -#include "common/logging/log.h" -#include "common/scm_rev.h" -#include "common/settings.h" -#include "common/zstd_compression.h" -#include "core/core.h" -#include "core/hle/kernel/k_process.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" - -namespace OpenGL { - -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::BindlessSamplerMap; -using VideoCommon::Shader::BoundSamplerMap; -using VideoCommon::Shader::KeyMap; -using VideoCommon::Shader::SeparateSamplerKey; -using ShaderCacheVersionHash = std::array; - -struct ConstBufferKey { - u32 cbuf = 0; - u32 offset = 0; - u32 value = 0; -}; - -struct BoundSamplerEntry { - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct SeparateSamplerEntry { - u32 cbuf1 = 0; - u32 cbuf2 = 0; - u32 offset1 = 0; - u32 offset2 = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct BindlessSamplerEntry { - u32 cbuf = 0; - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -namespace { - -constexpr u32 NativeVersion = 21; - -ShaderCacheVersionHash GetShaderCacheVersionHash() { - ShaderCacheVersionHash hash{}; - const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); - std::memcpy(hash.data(), Common::g_shader_cache_version, length); - return hash; -} - -} // Anonymous namespace - -ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; - -ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; - -bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { - if (!file.ReadObject(type)) { - return false; - } - u32 code_size; - u32 code_size_b; - if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) { - return false; - } - code.resize(code_size); - code_b.resize(code_size_b); - if (file.Read(code) != code_size) { - return false; - } - if (HasProgramA() && file.Read(code_b) != code_size_b) { - return false; - } - - u8 is_texture_handler_size_known; - u32 texture_handler_size_value; - u32 num_keys; - u32 num_bound_samplers; - u32 num_separate_samplers; - u32 num_bindless_samplers; - if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) || - !file.ReadObject(is_texture_handler_size_known) || - !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) || - !file.ReadObject(compute_info) || !file.ReadObject(num_keys) || - !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) || - !file.ReadObject(num_bindless_samplers)) { - return false; - } - if (is_texture_handler_size_known) { - texture_handler_size = texture_handler_size_value; - } - - std::vector flat_keys(num_keys); - std::vector flat_bound_samplers(num_bound_samplers); - std::vector flat_separate_samplers(num_separate_samplers); - std::vector flat_bindless_samplers(num_bindless_samplers); - if (file.Read(flat_keys) != flat_keys.size() || - file.Read(flat_bound_samplers) != flat_bound_samplers.size() || - file.Read(flat_separate_samplers) != flat_separate_samplers.size() || - file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) { - return false; - } - for (const auto& entry : flat_keys) { - keys.insert({{entry.cbuf, entry.offset}, entry.value}); - } - for (const auto& entry : flat_bound_samplers) { - bound_samplers.emplace(entry.offset, entry.sampler); - } - for (const auto& entry : flat_separate_samplers) { - SeparateSamplerKey key; - key.buffers = {entry.cbuf1, entry.cbuf2}; - key.offsets = {entry.offset1, entry.offset2}; - separate_samplers.emplace(key, entry.sampler); - } - for (const auto& entry : flat_bindless_samplers) { - bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); - } - - return true; -} - -bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { - if (!file.WriteObject(static_cast(type)) || - !file.WriteObject(static_cast(code.size())) || - !file.WriteObject(static_cast(code_b.size()))) { - return false; - } - if (file.Write(code) != code.size()) { - return false; - } - if (HasProgramA() && file.Write(code_b) != code_b.size()) { - return false; - } - - if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) || - !file.WriteObject(static_cast(texture_handler_size.has_value())) || - !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) || - !file.WriteObject(compute_info) || !file.WriteObject(static_cast(keys.size())) || - !file.WriteObject(static_cast(bound_samplers.size())) || - !file.WriteObject(static_cast(separate_samplers.size())) || - !file.WriteObject(static_cast(bindless_samplers.size()))) { - return false; - } - - std::vector flat_keys; - flat_keys.reserve(keys.size()); - for (const auto& [address, value] : keys) { - flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); - } - - std::vector flat_bound_samplers; - flat_bound_samplers.reserve(bound_samplers.size()); - for (const auto& [address, sampler] : bound_samplers) { - flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); - } - - std::vector flat_separate_samplers; - flat_separate_samplers.reserve(separate_samplers.size()); - for (const auto& [key, sampler] : separate_samplers) { - SeparateSamplerEntry entry; - std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; - std::tie(entry.offset1, entry.offset2) = key.offsets; - entry.sampler = sampler; - flat_separate_samplers.push_back(entry); - } - - std::vector flat_bindless_samplers; - flat_bindless_samplers.reserve(bindless_samplers.size()); - for (const auto& [address, sampler] : bindless_samplers) { - flat_bindless_samplers.push_back( - BindlessSamplerEntry{address.first, address.second, sampler}); - } - - return file.Write(flat_keys) == flat_keys.size() && - file.Write(flat_bound_samplers) == flat_bound_samplers.size() && - file.Write(flat_separate_samplers) == flat_separate_samplers.size() && - file.Write(flat_bindless_samplers) == flat_bindless_samplers.size(); -} - -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; - -ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; - -void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { - title_id = title_id_; -} - -std::optional> ShaderDiskCacheOpenGL::LoadTransferable() { - // Skip games without title id - const bool has_title_id = title_id != 0; - if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { - return std::nullopt; - } - - Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No transferable shader cache found"); - is_usable = true; - return std::nullopt; - } - - u32 version{}; - if (!file.ReadObject(version)) { - LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); - return std::nullopt; - } - - if (version < NativeVersion) { - LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); - file.Close(); - InvalidateTransferable(); - is_usable = true; - return std::nullopt; - } - if (version > NativeVersion) { - LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " - "of the emulator, skipping"); - return std::nullopt; - } - - // Version is valid, load the shaders - std::vector entries; - while (static_cast(file.Tell()) < file.GetSize()) { - ShaderDiskCacheEntry& entry = entries.emplace_back(); - if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); - return std::nullopt; - } - } - - is_usable = true; - return {std::move(entries)}; -} - -std::vector ShaderDiskCacheOpenGL::LoadPrecompiled() { - if (!is_usable) { - return {}; - } - - Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); - return {}; - } - - if (const auto result = LoadPrecompiledFile(file)) { - return *result; - } - - LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); - file.Close(); - InvalidatePrecompiled(); - return {}; -} - -std::optional> ShaderDiskCacheOpenGL::LoadPrecompiledFile( - Common::FS::IOFile& file) { - // Read compressed file from disk and decompress to virtual precompiled cache file - std::vector compressed(file.GetSize()); - if (file.Read(compressed) != file.GetSize()) { - return std::nullopt; - } - const std::vector decompressed = Common::Compression::DecompressDataZSTD(compressed); - SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); - precompiled_cache_virtual_file_offset = 0; - - ShaderCacheVersionHash file_hash{}; - if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - if (GetShaderCacheVersionHash() != file_hash) { - LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - - std::vector entries; - while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - u32 binary_size; - auto& entry = entries.emplace_back(); - if (!LoadObjectFromPrecompiled(entry.unique_identifier) || - !LoadObjectFromPrecompiled(entry.binary_format) || - !LoadObjectFromPrecompiled(binary_size)) { - return std::nullopt; - } - - entry.binary.resize(binary_size); - if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { - return std::nullopt; - } - } - return entries; -} - -void ShaderDiskCacheOpenGL::InvalidateTransferable() { - if (!Common::FS::RemoveFile(GetTransferablePath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", - Common::FS::PathToUTF8String(GetTransferablePath())); - } - InvalidatePrecompiled(); -} - -void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { - // Clear virtaul precompiled cache file - precompiled_cache_virtual_file.Resize(0); - - if (!Common::FS::RemoveFile(GetPrecompiledPath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", - Common::FS::PathToUTF8String(GetPrecompiledPath())); - } -} - -void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { - if (!is_usable) { - return; - } - - const u64 id = entry.unique_identifier; - if (stored_transferable.contains(id)) { - // The shader already exists - return; - } - - Common::FS::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) { - return; - } - if (!entry.Save(file)) { - LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); - file.Close(); - InvalidateTransferable(); - return; - } - - stored_transferable.insert(id); -} - -void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { - if (!is_usable) { - return; - } - - // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header - // when writing the dump. This should be done the moment I get access to write to the virtual - // file. - if (precompiled_cache_virtual_file.GetSize() == 0) { - SavePrecompiledHeaderToVirtualPrecompiledCache(); - } - - GLint binary_length; - glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); - - GLenum binary_format; - std::vector binary(binary_length); - glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - - if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || - !SaveObjectToPrecompiled(static_cast(binary.size())) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", - unique_identifier); - InvalidatePrecompiled(); - } -} - -Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { - if (!EnsureDirectories()) { - return {}; - } - - const auto transferable_path{GetTransferablePath()}; - const bool existed = Common::FS::Exists(transferable_path); - - Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - if (!existed || file.GetSize() == 0) { - // If the file didn't exist, write its version - if (!file.WriteObject(NativeVersion)) { - LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - } - return file; -} - -void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { - const auto hash{GetShaderCacheVersionHash()}; - if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { - LOG_ERROR( - Render_OpenGL, - "Failed to write precompiled cache version hash to virtual precompiled cache file"); - } -} - -void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { - precompiled_cache_virtual_file_offset = 0; - const std::vector uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); - const std::vector compressed = - Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); - - const auto precompiled_path = GetPrecompiledPath(); - Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write, - Common::FS::FileType::BinaryFile}; - - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - return; - } - if (file.Write(compressed) != compressed.size()) { - LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - } -} - -bool ShaderDiskCacheOpenGL::EnsureDirectories() const { - const auto CreateDir = [](const std::filesystem::path& dir) { - if (!Common::FS::CreateDir(dir)) { - LOG_ERROR(Render_OpenGL, "Failed to create directory={}", - Common::FS::PathToUTF8String(dir)); - return false; - } - return true; - }; - - return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) && - CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && - CreateDir(GetPrecompiledDir()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const { - return GetTransferableDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const { - return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const { - return GetBaseDir() / "transferable"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const { - return GetBaseDir() / "precompiled"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const { - return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl"; -} - -std::string ShaderDiskCacheOpenGL::GetTitleID() const { - return fmt::format("{:016X}", title_id); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h deleted file mode 100644 index f8bc23868..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "core/file_sys/vfs_vector.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace Common::FS { -class IOFile; -} - -namespace OpenGL { - -using ProgramCode = std::vector; - -/// Describes a shader and how it's used by the guest GPU -struct ShaderDiskCacheEntry { - ShaderDiskCacheEntry(); - ~ShaderDiskCacheEntry(); - - bool Load(Common::FS::IOFile& file); - - bool Save(Common::FS::IOFile& file) const; - - bool HasProgramA() const { - return !code.empty() && !code_b.empty(); - } - - Tegra::Engines::ShaderType type{}; - ProgramCode code; - ProgramCode code_b; - - u64 unique_identifier = 0; - std::optional texture_handler_size; - u32 bound_buffer = 0; - VideoCommon::Shader::GraphicsInfo graphics_info; - VideoCommon::Shader::ComputeInfo compute_info; - VideoCommon::Shader::KeyMap keys; - VideoCommon::Shader::BoundSamplerMap bound_samplers; - VideoCommon::Shader::SeparateSamplerMap separate_samplers; - VideoCommon::Shader::BindlessSamplerMap bindless_samplers; -}; - -/// Contains an OpenGL dumped binary program -struct ShaderDiskCachePrecompiled { - u64 unique_identifier = 0; - GLenum binary_format = 0; - std::vector binary; -}; - -class ShaderDiskCacheOpenGL { -public: - explicit ShaderDiskCacheOpenGL(); - ~ShaderDiskCacheOpenGL(); - - /// Binds a title ID for all future operations. - void BindTitleID(u64 title_id); - - /// Loads transferable cache. If file has a old version or on failure, it deletes the file. - std::optional> LoadTransferable(); - - /// Loads current game's precompiled cache. Invalidates on failure. - std::vector LoadPrecompiled(); - - /// Removes the transferable (and precompiled) cache file. - void InvalidateTransferable(); - - /// Removes the precompiled cache file and clears virtual precompiled cache file. - void InvalidatePrecompiled(); - - /// Saves a raw dump to the transferable file. Checks for collisions. - void SaveEntry(const ShaderDiskCacheEntry& entry); - - /// Saves a dump entry to the precompiled file. Does not check for collisions. - void SavePrecompiled(u64 unique_identifier, GLuint program); - - /// Serializes virtual precompiled shader cache file to real file - void SaveVirtualPrecompiledFile(); - -private: - /// Loads the transferable cache. Returns empty on failure. - std::optional> LoadPrecompiledFile( - Common::FS::IOFile& file); - - /// Opens current game's transferable file and write it's header if it doesn't exist - Common::FS::IOFile AppendTransferableFile() const; - - /// Save precompiled header to precompiled_cache_in_memory - void SavePrecompiledHeaderToVirtualPrecompiledCache(); - - /// Create shader disk cache directories. Returns true on success. - bool EnsureDirectories() const; - - /// Gets current game's transferable file path - std::filesystem::path GetTransferablePath() const; - - /// Gets current game's precompiled file path - std::filesystem::path GetPrecompiledPath() const; - - /// Get user's transferable directory path - std::filesystem::path GetTransferableDir() const; - - /// Get user's precompiled directory path - std::filesystem::path GetPrecompiledDir() const; - - /// Get user's shader directory path - std::filesystem::path GetBaseDir() const; - - /// Get current game's title id - std::string GetTitleID() const; - - template - bool SaveArrayToPrecompiled(const T* data, std::size_t length) { - const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += write_length; - return write_length == sizeof(T) * length; - } - - template - bool LoadArrayFromPrecompiled(T* data, std::size_t length) { - const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += read_length; - return read_length == sizeof(T) * length; - } - - template - bool SaveObjectToPrecompiled(const T& object) { - return SaveArrayToPrecompiled(&object, 1); - } - - bool SaveObjectToPrecompiled(bool object) { - const auto value = static_cast(object); - return SaveArrayToPrecompiled(&value, 1); - } - - template - bool LoadObjectFromPrecompiled(T& object) { - return LoadArrayFromPrecompiled(&object, 1); - } - - // Stores whole precompiled cache which will be read from or saved to the precompiled chache - // file - FileSys::VectorVfsFile precompiled_cache_virtual_file; - // Stores the current offset of the precompiled cache file for IO purposes - std::size_t precompiled_cache_virtual_file_offset = 0; - - // Stored transferable shaders - std::unordered_set stored_transferable; - - /// Title ID to operate on - u64 title_id = 0; - - // The cache has been loaded at boot - bool is_usable = false; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b7f5b8bc2..6c0d5c7f4 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -323,7 +323,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } - } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3a48219b7..7a3660496 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -8,146 +8,14 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_) - : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, - descriptor_set_layout{CreateDescriptorSetLayout()}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate()}, - shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} +ComputePipeline::ComputePipeline() = default; -VKComputePipeline::~VKComputePipeline() = default; - -VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { - std::vector bindings; - u32 binding = 0; - const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { - // TODO(Rodrigo): Maybe make individual bindings here? - for (u32 bindpoint = 0; bindpoint < static_cast(num_entries); ++bindpoint) { - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - } - }; - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - return device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }); -} - -vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { - std::vector template_entries; - u32 binding = 0; - u32 offset = 0; - FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }); -} - -vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector& code) const { - device.SaveShader(code); - - return device.GetLogical().CreateShaderModule({ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = code.size() * sizeof(u32), - .pCode = code.data(), - }); -} - -vk::Pipeline VKComputePipeline::CreatePipeline() const { - - VkComputePipelineCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *shader_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *layout, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - ci.stage.pNext = &subgroup_size_ci; - } - - return device.GetLogical().CreateComputePipeline(ci); -} +ComputePipeline::~ComputePipeline() = default; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 7e16575ac..433d8bb3d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,7 +6,6 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -15,50 +14,10 @@ class Device; class VKScheduler; class VKUpdateDescriptorQueue; -class VKComputePipeline final { +class ComputePipeline { public: - explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_); - ~VKComputePipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout() const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; - - vk::ShaderModule CreateShaderModule(const std::vector& code) const; - - vk::Pipeline CreatePipeline() const; - - const Device& device; - VKScheduler& scheduler; - ShaderEntries entries; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - vk::ShaderModule shader_module; - vk::Pipeline pipeline; + explicit ComputePipeline(); + ~ComputePipeline(); }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp deleted file mode 100644 index fc6dd83eb..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ /dev/null @@ -1,484 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include - -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -MICROPROFILE_DECLARE(Vulkan_PipelineCache); - -namespace { - -template -VkStencilOpState GetStencilFaceState(const StencilFace& face) { - return { - .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), - .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), - .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), - .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), - .compareMask = 0, - .writeMask = 0, - .reference = 0, - }; -} - -bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { - static constexpr std::array unsupported_topologies = { - VK_PRIMITIVE_TOPOLOGY_POINT_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; - return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), - topology) == std::end(unsupported_topologies); -} - -VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { - union Swizzle { - u32 raw; - BitField<0, 3, Maxwell::ViewportSwizzle> x; - BitField<4, 3, Maxwell::ViewportSwizzle> y; - BitField<8, 3, Maxwell::ViewportSwizzle> z; - BitField<12, 3, Maxwell::ViewportSwizzle> w; - }; - const Swizzle unpacked{swizzle}; - - return { - .x = MaxwellToVK::ViewportSwizzle(unpacked.x), - .y = MaxwellToVK::ViewportSwizzle(unpacked.y), - .z = MaxwellToVK::ViewportSwizzle(unpacked.z), - .w = MaxwellToVK::ViewportSwizzle(unpacked.w), - }; -} - -VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { - switch (msaa_mode) { - case Tegra::Texture::MsaaMode::Msaa1x1: - return VK_SAMPLE_COUNT_1_BIT; - case Tegra::Texture::MsaaMode::Msaa2x1: - case Tegra::Texture::MsaaMode::Msaa2x1_D3D: - return VK_SAMPLE_COUNT_2_BIT; - case Tegra::Texture::MsaaMode::Msaa2x2: - case Tegra::Texture::MsaaMode::Msaa2x2_VC4: - case Tegra::Texture::MsaaMode::Msaa2x2_VC12: - return VK_SAMPLE_COUNT_4_BIT; - case Tegra::Texture::MsaaMode::Msaa4x2: - case Tegra::Texture::MsaaMode::Msaa4x2_D3D: - case Tegra::Texture::MsaaMode::Msaa4x2_VC8: - case Tegra::Texture::MsaaMode::Msaa4x2_VC24: - return VK_SAMPLE_COUNT_8_BIT; - case Tegra::Texture::MsaaMode::Msaa4x4: - return VK_SAMPLE_COUNT_16_BIT; - default: - UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); - return VK_SAMPLE_COUNT_1_BIT; - } -} - -} // Anonymous namespace - -VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program, u32 num_color_buffers) - : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, - descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate(program)}, - modules(CreateShaderModules(program)), - pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} - -VKGraphicsPipeline::~VKGraphicsPipeline() = default; - -VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( - vk::Span bindings) const { - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = bindings.size(), - .pBindings = bindings.data(), - }; - return device.GetLogical().CreateDescriptorSetLayout(ci); -} - -vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { - const VkPipelineLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - return device.GetLogical().CreatePipelineLayout(ci); -} - -vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const { - std::vector template_entries; - u32 binding = 0; - u32 offset = 0; - for (const auto& stage : program) { - if (stage) { - FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); - } - } - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - const VkDescriptorUpdateTemplateCreateInfoKHR ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); -} - -std::vector VKGraphicsPipeline::CreateShaderModules( - const SPIRVProgram& program) const { - VkShaderModuleCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = 0, - .pCode = nullptr, - }; - - std::vector shader_modules; - shader_modules.reserve(Maxwell::MaxShaderStage); - for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { - const auto& stage = program[i]; - if (!stage) { - continue; - } - - device.SaveShader(stage->code); - - ci.codeSize = stage->code.size() * sizeof(u32); - ci.pCode = stage->code.data(); - shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); - } - return shader_modules; -} - -vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, - VkRenderPass renderpass, - u32 num_color_buffers) const { - const auto& state = cache_key.fixed_state; - const auto& viewport_swizzles = state.viewport_swizzles; - - FixedPipelineState::DynamicState dynamic; - if (device.IsExtExtendedDynamicStateSupported()) { - // Insert dummy values, as long as they are valid they don't matter as extended dynamic - // state is ignored - dynamic.raw1 = 0; - dynamic.raw2 = 0; - dynamic.vertex_strides.fill(0); - } else { - dynamic = state.dynamic_state; - } - - std::vector vertex_bindings; - std::vector vertex_binding_divisors; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ - .binding = static_cast(index), - .divisor = state.binding_divisors[index], - }); - } - } - - std::vector vertex_attributes; - const auto& input_attributes = program[0]->entries.attributes; - for (std::size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; - if (!attribute.enabled) { - continue; - } - if (!input_attributes.contains(static_cast(index))) { - // Skip attributes not used by the vertex shaders. - continue; - } - vertex_attributes.push_back({ - .location = static_cast(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); - } - - VkPipelineVertexInputStateCreateInfo vertex_input_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), - .pVertexBindingDescriptions = vertex_bindings.data(), - .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), - .pVertexAttributeDescriptions = vertex_attributes.data(), - }; - - const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .vertexBindingDivisorCount = static_cast(vertex_binding_divisors.size()), - .pVertexBindingDivisors = vertex_binding_divisors.data(), - }; - if (!vertex_binding_divisors.empty()) { - vertex_input_ci.pNext = &input_divisor_ci; - } - - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); - const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), - .primitiveRestartEnable = state.primitive_restart_enable != 0 && - SupportsPrimitiveRestart(input_assembly_topology), - }; - - const VkPipelineTessellationStateCreateInfo tessellation_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, - }; - - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, - }; - - std::array swizzles; - std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewportSwizzles = swizzles.data(), - }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } - - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthClampEnable = - static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), - .rasterizerDiscardEnable = - static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = static_cast( - dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), - .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }; - - const VkPipelineMultisampleStateCreateInfo multisample_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), - .sampleShadingEnable = VK_FALSE, - .minSampleShading = 0.0f, - .pSampleMask = nullptr, - .alphaToCoverageEnable = VK_FALSE, - .alphaToOneEnable = VK_FALSE, - }; - - const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthTestEnable = dynamic.depth_test_enable, - .depthWriteEnable = dynamic.depth_write_enable, - .depthCompareOp = dynamic.depth_test_enable - ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) - : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, - .stencilTestEnable = dynamic.stencil_enable, - .front = GetStencilFaceState(dynamic.front), - .back = GetStencilFaceState(dynamic.back), - .minDepthBounds = 0.0f, - .maxDepthBounds = 0.0f, - }; - - std::array cb_attachments; - for (std::size_t index = 0; index < num_color_buffers; ++index) { - static constexpr std::array COMPONENT_TABLE{ - VK_COLOR_COMPONENT_R_BIT, - VK_COLOR_COMPONENT_G_BIT, - VK_COLOR_COMPONENT_B_BIT, - VK_COLOR_COMPONENT_A_BIT, - }; - const auto& blend = state.attachments[index]; - - VkColorComponentFlags color_components = 0; - for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { - if (blend.Mask()[i]) { - color_components |= COMPONENT_TABLE[i]; - } - } - - cb_attachments[index] = { - .blendEnable = blend.enable != 0, - .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), - .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), - .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), - .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), - .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), - .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), - .colorWriteMask = color_components, - }; - } - - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = num_color_buffers, - .pAttachments = cb_attachments.data(), - .blendConstants = {}, - }; - - std::vector dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }; - if (device.IsExtExtendedDynamicStateSupported()) { - static constexpr std::array extended{ - VK_DYNAMIC_STATE_CULL_MODE_EXT, - VK_DYNAMIC_STATE_FRONT_FACE_EXT, - VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, - VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, - VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_OP_EXT, - }; - dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); - } - - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .dynamicStateCount = static_cast(dynamic_states.size()), - .pDynamicStates = dynamic_states.data(), - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - std::vector shader_stages; - std::size_t module_index = 0; - for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - if (!program[stage]) { - continue; - } - - VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = MaxwellToVK::ShaderStage(static_cast(stage)); - stage_ci.module = *modules[module_index++]; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { - stage_ci.pNext = &subgroup_size_ci; - } - } - return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(shader_stages.size()), - .pStages = shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = &tessellation_ci, - .pViewportState = &viewport_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisample_ci, - .pDepthStencilState = &depth_stencil_ci, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *layout, - .renderPass = renderpass, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h deleted file mode 100644 index 8b6a98fe0..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsPipelineCacheKey { - VkRenderPass renderpass; - std::array shaders; - FixedPipelineState fixed_state; - - std::size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - std::size_t Size() const noexcept { - return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -class Device; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - -using SPIRVProgram = std::array, Maxwell::MaxShaderStage>; - -class VKGraphicsPipeline final { -public: - explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program, u32 num_color_buffers); - ~VKGraphicsPipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - GraphicsPipelineCacheKey GetCacheKey() const { - return cache_key; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout( - vk::Span bindings) const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const; - - std::vector CreateShaderModules(const SPIRVProgram& program) const; - - vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, - u32 num_color_buffers) const; - - const Device& device; - VKScheduler& scheduler; - const GraphicsPipelineCacheKey cache_key; - const u64 hash; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - std::vector modules; - - vk::Pipeline pipeline; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8991505ca..7d0ba1180 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -19,49 +19,27 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { - MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; namespace { - -constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; -constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; -constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - -constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ - .depth = VideoCommon::Shader::CompileDepth::FullDecompile, - .disable_else_derivation = true, -}; - -constexpr std::size_t GetStageFromProgram(std::size_t program) { +size_t StageFromProgram(size_t program) { return program == 0 ? 0 : program - 1; } -constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { - return static_cast(GetStageFromProgram(static_cast(program))); +ShaderType StageFromProgram(Maxwell::ShaderProgram program) { + return static_cast(StageFromProgram(static_cast(program))); } ShaderType GetShaderType(Maxwell::ShaderProgram program) { @@ -81,165 +59,35 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { return ShaderType::Vertex; } } - -template -void AddBindings(std::vector& bindings, u32& binding, - VkShaderStageFlags stage_flags, const Container& container) { - const u32 num_entries = static_cast(std::size(container)); - for (std::size_t i = 0; i < num_entries; ++i) { - u32 count = 1; - if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - // Combined image samplers can be arrayed. - count = container[i].size; - } - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = count, - .stageFlags = stage_flags, - .pImmutableSamplers = nullptr, - }); - } -} - -u32 FillDescriptorLayout(const ShaderEntries& entries, - std::vector& bindings, - Maxwell::ShaderProgram program_type, u32 base_binding) { - const ShaderType stage = GetStageFromProgram(program_type); - const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); - - u32 binding = base_binding; - AddBindings(bindings, binding, flags, entries.const_buffers); - AddBindings(bindings, binding, flags, entries.global_buffers); - AddBindings(bindings, binding, flags, entries.uniform_texels); - AddBindings(bindings, binding, flags, entries.samplers); - AddBindings(bindings, binding, flags, entries.storage_texels); - AddBindings(bindings, binding, flags, entries.images); - return binding; -} - } // Anonymous namespace -std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); - return static_cast(hash); -} - -bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { - return std::memcmp(&rhs, this, Size()) == 0; -} - -std::size_t ComputePipelineCacheKey::Hash() const noexcept { +size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); - return static_cast(hash); + return static_cast(hash); } bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, - GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) - : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), - shader_ir(program_code, main_offset_, compiler_settings, registry), - entries(GenerateShaderEntries(shader_ir)) {} +Shader::Shader() = default; Shader::~Shader() = default; -VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, + VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} -VKPipelineCache::~VKPipelineCache() = default; - -std::array VKPipelineCache::GetShaders() { - std::array shaders{}; - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto program{static_cast(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - - const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - if (!result) { - const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; - - // No shader found - create a new one - static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; - const auto stage = static_cast(index == 0 ? 0 : index - 1); - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader = std::make_unique(maxwell3d, stage, gpu_addr, *cpu_addr, - std::move(code), stage_offset); - result = shader.get(); - - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, size_in_bytes); - } else { - null_shader = std::move(shader); - } - } - shaders[index] = result; - } - return last_shaders = shaders; -} - -VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( - const GraphicsPipelineCacheKey& key, u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders) { - MICROPROFILE_SCOPE(Vulkan_PipelineCache); - - if (last_graphics_pipeline && last_graphics_key == key) { - return last_graphics_pipeline; - } - last_graphics_key = key; - - if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { - std::unique_lock lock{pipeline_cache}; - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, - update_descriptor_queue, bindings, program, key, - num_color_buffers); - } - last_graphics_pipeline = pair->second.get(); - return last_graphics_pipeline; - } - - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - auto& entry = pair->second; - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, key, bindings, - program, num_color_buffers); - gpu.ShaderNotify().MarkShaderComplete(); - } - last_graphics_pipeline = entry.get(); - return last_graphics_pipeline; -} +PipelineCache::~PipelineCache() = default; -VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); @@ -248,200 +96,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - - const GPUVAddr gpu_addr = key.shader; - - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); - if (!shader) { - // No shader found - create a new one - const auto host_ptr = gpu_memory.GetPointer(gpu_addr); - - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader_info = std::make_unique(kepler_compute, ShaderType::Compute, gpu_addr, - *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); - shader = shader_info.get(); - - if (cpu_addr) { - Register(std::move(shader_info), *cpu_addr, size_in_bytes); - } else { - null_kernel = std::move(shader_info); - } - } - - const Specialization specialization{ - .base_binding = 0, - .workgroup_size = key.workgroup_size, - .shared_memory_size = key.shared_memory_size, - .point_size = std::nullopt, - .enabled_attributes = {}, - .attribute_types = {}, - .ndc_minus_one_to_one = false, - }; - const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, - shader->GetRegistry(), specialization), - shader->GetEntries()}; - entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, spirv_shader); - return *entry; -} - -void VKPipelineCache::EmplacePipeline(std::unique_ptr pipeline) { - gpu.ShaderNotify().MarkShaderComplete(); - std::unique_lock lock{pipeline_cache}; - graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); -} - -void VKPipelineCache::OnShaderRemoval(Shader* shader) { - bool finished = false; - const auto Finish = [&] { - // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and - // flush. - if (finished) { - return; - } - finished = true; - scheduler.Finish(); - }; - - const GPUVAddr invalidated_addr = shader->GetGpuAddr(); - for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { - auto& entry = it->first; - if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == - entry.shaders.end()) { - ++it; - continue; - } - Finish(); - it = graphics_cache.erase(it); - } - for (auto it = compute_cache.begin(); it != compute_cache.end();) { - auto& entry = it->first; - if (entry.shader != invalidated_addr) { - ++it; - continue; - } - Finish(); - it = compute_cache.erase(it); - } -} - -std::pair> -VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { - Specialization specialization; - if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { - float point_size; - std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); - specialization.point_size = point_size; - ASSERT(point_size != 0.0f); - } - for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - const auto& attribute = fixed_state.attributes[i]; - specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; - specialization.attribute_types[i] = attribute.Type(); - } - specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; - specialization.early_fragment_tests = fixed_state.early_z; - - // Alpha test - specialization.alpha_test_func = - FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); - specialization.alpha_test_ref = Common::BitCast(fixed_state.alpha_test_ref); - - SPIRVProgram program; - std::vector bindings; - - for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { - const auto program_enum = static_cast(index); - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - - const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 - const ShaderType program_type = GetShaderType(program_enum); - const auto& entries = shader->GetEntries(); - program[stage] = { - Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), - entries, - }; - - const u32 old_binding = specialization.base_binding; - specialization.base_binding = - FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); - ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); - } - return {std::move(program), std::move(bindings)}; + throw "Bad"; } -template -void AddEntry(std::vector& template_entries, u32& binding, - u32& offset, const Container& container) { - static constexpr u32 entry_size = static_cast(sizeof(DescriptorUpdateEntry)); - const u32 count = static_cast(std::size(container)); - - if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { - for (u32 i = 0; i < count; ++i) { - const u32 num_samplers = container[i].size; - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = num_samplers, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - - ++binding; - offset += num_samplers * entry_size; - } - return; - } - - if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || - descriptor_type == STORAGE_TEXEL_BUFFER) { - // Nvidia has a bug where updating multiple texels at once causes the driver to crash. - // Note: Fixed in driver Windows 443.24, Linux 440.66.15 - for (u32 i = 0; i < count; ++i) { - template_entries.push_back({ - .dstBinding = binding + i, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = descriptor_type, - .offset = static_cast(offset + i * entry_size), - .stride = entry_size, - }); - } - } else if (count > 0) { - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = count, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - } - offset += count * entry_size; - binding += count; -} - -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector& template_entries) { - AddEntry(template_entries, offset, binding, entries.const_buffers); - AddEntry(template_entries, offset, binding, entries.global_buffers); - AddEntry(template_entries, offset, binding, entries.uniform_texels); - AddEntry(template_entries, offset, binding, entries.samplers); - AddEntry(template_entries, offset, binding, entries.storage_texels); - AddEntry(template_entries, offset, binding, entries.images); -} +void PipelineCache::OnShaderRemoval(Shader*) {} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 89d635a3d..e3e63340d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -15,15 +15,8 @@ #include #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/async_shaders.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -35,7 +28,7 @@ namespace Vulkan { class Device; class RasterizerVulkan; -class VKComputePipeline; +class ComputePipeline; class VKDescriptorPool; class VKScheduler; class VKUpdateDescriptorQueue; @@ -47,7 +40,7 @@ struct ComputePipelineCacheKey { u32 shared_memory_size; std::array workgroup_size; - std::size_t Hash() const noexcept; + size_t Hash() const noexcept; bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; @@ -63,16 +56,9 @@ static_assert(std::is_trivially_constructible_v); namespace std { -template <> -struct hash { - std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - template <> struct hash { - std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { + size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { return k.Hash(); } }; @@ -83,66 +69,26 @@ namespace Vulkan { class Shader { public: - explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, - Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); + explicit Shader(); ~Shader(); - - GPUVAddr GetGpuAddr() const { - return gpu_addr; - } - - VideoCommon::Shader::ShaderIR& GetIR() { - return shader_ir; - } - - const VideoCommon::Shader::ShaderIR& GetIR() const { - return shader_ir; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return registry; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - GPUVAddr gpu_addr{}; - VideoCommon::Shader::ProgramCode program_code; - VideoCommon::Shader::Registry registry; - VideoCommon::Shader::ShaderIR shader_ir; - ShaderEntries entries; }; -class VKPipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache final : public VideoCommon::ShaderCache { public: - explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const Device& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); - ~VKPipelineCache() override; - - std::array GetShaders(); + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const Device& device, + VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); + ~PipelineCache() override; - VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, - u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders); - - VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); - - void EmplacePipeline(std::unique_ptr pipeline); + ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); protected: void OnShaderRemoval(Shader* shader) final; private: - std::pair> DecompileShaders( - const FixedPipelineState& fixed_state); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -158,17 +104,8 @@ private: std::array last_shaders{}; - GraphicsPipelineCacheKey last_graphics_key; - VKGraphicsPipeline* last_graphics_pipeline = nullptr; - std::mutex pipeline_cache; - std::unordered_map> - graphics_cache; - std::unordered_map> compute_cache; + std::unordered_map> compute_cache; }; -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector& template_entries); - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f57c15b37..f152297d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -24,7 +24,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -97,15 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -std::array GetShaderAddresses( - const std::array& shaders) { - std::array addresses; - for (size_t i = 0; i < std::size(addresses); ++i) { - addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; - } - return addresses; -} - struct TextureHandle { constexpr TextureHandle(u32 data, bool via_header_index) { const Tegra::Texture::TextureHandle handle{data}; @@ -117,98 +107,6 @@ struct TextureHandle { u32 sampler; }; -template -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - size_t stage, size_t index = 0) { - const auto shader_type = static_cast(stage); - if constexpr (std::is_same_v) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::e2D; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, - ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { - for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const VkSampler sampler = *sampler_ptr++; - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddSampledImage(handle, sampler); - } - } - for ([[maybe_unused]] const auto& entry : entries.storage_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.images) { - // TODO: Mark as modified - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddImage(handle); - } -} - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ @@ -253,71 +151,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra descriptor_pool, update_descriptor_queue), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), - wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { + wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } } RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - MICROPROFILE_SCOPE(Vulkan_Drawing); - - SCOPE_EXIT({ gpu.TickWork(); }); - FlushWork(); - - query_cache.UpdateCounters(); - - graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - texture_cache.SynchronizeGraphicsDescriptors(); - texture_cache.UpdateRenderTargets(false); - - const auto shaders = pipeline_cache.GetShaders(); - graphics_key.shaders = GetShaderAddresses(shaders); - - SetupShaderDescriptors(shaders, is_indexed); - - const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); - graphics_key.renderpass = framebuffer->RenderPass(); - - VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( - graphics_key, framebuffer->NumColorBuffers(), async_shaders); - if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { - // Async graphics pipeline was not ready. - return; - } - - BeginTransformFeedback(); - - scheduler.RequestRenderpass(framebuffer); - scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - UpdateDynamicStates(); - - const auto& regs = maxwell3d.regs; - const u32 num_instances = maxwell3d.mme_draw.instance_count; - const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); - const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); - const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); - scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, - draw_params.base_vertex, draw_params.base_instance); - } else { - cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, - draw_params.base_vertex, draw_params.base_instance); - } - }); - - EndTransformFeedback(); + UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); } void RasterizerVulkan::Clear() { @@ -395,73 +236,8 @@ void RasterizerVulkan::Clear() { }); } -void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { - MICROPROFILE_SCOPE(Vulkan_Compute); - - query_cache.UpdateCounters(); - - const auto& launch_desc = kepler_compute.launch_description; - auto& pipeline = pipeline_cache.GetComputePipeline({ - .shader = code_addr, - .shared_memory_size = launch_desc.shared_alloc, - .workgroup_size{ - launch_desc.block_dim_x, - launch_desc.block_dim_y, - launch_desc.block_dim_z, - }, - }); - - // Compute dispatches can't be executed inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - image_view_indices.clear(); - sampler_handles.clear(); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - const auto& entries = pipeline.GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeUniformTexels(entries); - SetupComputeTextures(entries); - SetupComputeStorageTexels(entries); - SetupComputeImages(entries); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - update_descriptor_queue.Acquire(); - - buffer_cache.BindHostComputeBuffers(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, - sampler_ptr); - - const VkPipeline pipeline_handle = pipeline.GetHandle(); - const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); - const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); - scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, - grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, - descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - cmdbuf.Dispatch(grid_x, grid_y, grid_z); - }); +void RasterizerVulkan::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { @@ -716,52 +492,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 return buffer_cache.DMACopy(src_address, dest_address, amount); } -void RasterizerVulkan::SetupShaderDescriptors( - const std::array& shaders, bool is_indexed) { - image_view_indices.clear(); - sampler_handles.clear(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - const ShaderEntries& entries = shader->GetEntries(); - SetupGraphicsUniformTexels(entries, stage); - SetupGraphicsTextures(entries, stage); - SetupGraphicsStorageTexels(entries, stage); - SetupGraphicsImages(entries, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache.UpdateGraphicsBuffers(is_indexed); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - update_descriptor_queue.Acquire(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - // Skip VertexA stage - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, - image_view_id_ptr, sampler_ptr); - } -} - void RasterizerVulkan::UpdateDynamicStates() { auto& regs = maxwell3d.regs; UpdateViewportsState(regs); @@ -810,89 +540,6 @@ void RasterizerVulkan::EndTransformFeedback() { [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } -void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = - GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.images) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, - COMPUTE_SHADER_INDEX, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.images) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2065209be..31017dc2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -28,7 +28,6 @@ #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/async_shaders.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -73,7 +72,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -103,19 +102,6 @@ public: bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - - /// Maximum supported size that a constbuffer can have in bytes. - static constexpr size_t MaxConstbufferSize = 0x10000; - static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, - "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; @@ -125,40 +111,12 @@ private: void FlushWork(); - /// Setup descriptors in the graphics pipeline. - void SetupShaderDescriptors(const std::array& shaders, - bool is_indexed); - void UpdateDynamicStates(); void BeginTransformFeedback(); void EndTransformFeedback(); - /// Setup uniform texels in the graphics pipeline. - void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup textures in the graphics pipeline. - void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); - - /// Setup storage texels in the graphics pipeline. - void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup images in the graphics pipeline. - void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); - - /// Setup texel buffers in the compute pipeline. - void SetupComputeUniformTexels(const ShaderEntries& entries); - - /// Setup textures in the compute pipeline. - void SetupComputeTextures(const ShaderEntries& entries); - - /// Setup storage texels in the compute pipeline. - void SetupComputeStorageTexels(const ShaderEntries& entries); - - /// Setup images in the compute pipeline. - void SetupComputeImages(const ShaderEntries& entries); - void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); @@ -198,13 +156,12 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - VKPipelineCache pipeline_cache; + PipelineCache pipeline_cache; VKQueryCache query_cache; AccelerateDMA accelerate_dma; VKFenceManager fence_manager; vk::Event wfi_event; - VideoCommon::Shader::AsyncShaders async_shaders; boost::container::static_vector image_view_indices; std::array image_view_ids; diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp deleted file mode 100644 index db11144c7..000000000 --- a/src/video_core/shader/ast.cpp +++ /dev/null @@ -1,752 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { - -ASTZipper::ASTZipper() = default; - -void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { - ASSERT(new_first->manager == nullptr); - first = new_first; - last = new_first; - - ASTNode current = first; - while (current) { - current->manager = this; - current->parent = parent; - last = current; - current = current->next; - } -} - -void ASTZipper::PushBack(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous = last; - if (last) { - last->next = new_node; - } - new_node->next.reset(); - last = new_node; - if (!first) { - first = new_node; - } - new_node->manager = this; -} - -void ASTZipper::PushFront(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous.reset(); - new_node->next = first; - if (first) { - first->previous = new_node; - } - if (last == first) { - last = new_node; - } - first = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushFront(new_node); - return; - } - const ASTNode next = at_node->next; - if (next) { - next->previous = new_node; - } - new_node->previous = at_node; - if (at_node == last) { - last = new_node; - } - new_node->next = next; - at_node->next = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushBack(new_node); - return; - } - const ASTNode previous = at_node->previous; - if (previous) { - previous->next = new_node; - } - new_node->next = at_node; - if (at_node == first) { - first = new_node; - } - new_node->previous = previous; - at_node->previous = new_node; - new_node->manager = this; -} - -void ASTZipper::DetachTail(ASTNode node) { - ASSERT(node->manager == this); - if (node == first) { - first.reset(); - last.reset(); - return; - } - - last = node->previous; - last->next.reset(); - node->previous.reset(); - - ASTNode current = std::move(node); - while (current) { - current->manager = nullptr; - current->parent.reset(); - current = current->next; - } -} - -void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { - ASSERT(start->manager == this && end->manager == this); - if (start == end) { - DetachSingle(start); - return; - } - const ASTNode prev = start->previous; - const ASTNode post = end->next; - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - start->previous.reset(); - end->next.reset(); - ASTNode current = start; - bool found = false; - while (current) { - current->manager = nullptr; - current->parent.reset(); - found |= current == end; - current = current->next; - } - ASSERT(found); -} - -void ASTZipper::DetachSingle(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode prev = node->previous; - const ASTNode post = node->next; - node->previous.reset(); - node->next.reset(); - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - - node->manager = nullptr; - node->parent.reset(); -} - -void ASTZipper::Remove(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode next = node->next; - const ASTNode previous = node->previous; - if (previous) { - previous->next = next; - } - if (next) { - next->previous = previous; - } - node->parent.reset(); - node->manager = nullptr; - if (node == last) { - last = previous; - } - if (node == first) { - first = next; - } -} - -class ExprPrinter final { -public: - void operator()(const ExprAnd& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += "!"; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - inner += fmt::format("P{}", expr.predicate); - } - - void operator()(const ExprCondCode& expr) { - inner += fmt::format("CC{}", expr.cc); - } - - void operator()(const ExprVar& expr) { - inner += fmt::format("V{}", expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(const ExprGprEqual& expr) { - inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string inner; -}; - -class ASTPrinter { -public: - void operator()(const ASTProgram& ast) { - scope++; - inner += "program {\n"; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - inner += "}\n"; - scope--; - } - - void operator()(const ASTIfThen& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}}\n", Indent()); - } - - void operator()(const ASTIfElse& ast) { - inner += Indent(); - inner += "else {\n"; - - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - - inner += Indent(); - inner += "}\n"; - } - - void operator()(const ASTBlockEncoded& ast) { - inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); - } - - void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { - inner += Indent(); - inner += "Block;\n"; - } - - void operator()(const ASTVarSet& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - inner += fmt::format("Label_{}:\n", ast.index); - } - - void operator()(const ASTGoto& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += - fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); - } - - void operator()(const ASTDoWhile& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}do {{\n", Indent()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), - ast.kills ? "discard" : "exit"); - } - - void operator()(const ASTBreak& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string_view Indent() { - if (space_segment_scope == scope) { - return space_segment; - } - - // Ensure that we don't exceed our view. - ASSERT(scope * 2 < spaces.size()); - - space_segment = spaces.substr(0, scope * 2); - space_segment_scope = scope; - return space_segment; - } - - std::string inner{}; - std::string_view space_segment; - - u32 scope{}; - u32 space_segment_scope{}; - - static constexpr std::string_view spaces{" "}; -}; - -std::string ASTManager::Print() const { - ASTPrinter printer{}; - printer.Visit(main_node); - return printer.GetResult(); -} - -ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) - : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} - -ASTManager::~ASTManager() { - Clear(); -} - -void ASTManager::Init() { - main_node = ASTBase::Make(ASTNode{}); - program = std::get_if(main_node->GetInnerData()); - false_condition = MakeExpr(false); -} - -void ASTManager::DeclareLabel(u32 address) { - const auto pair = labels_map.emplace(address, labels_count); - if (pair.second) { - labels_count++; - labels.resize(labels_count); - } -} - -void ASTManager::InsertLabel(u32 address) { - const u32 index = labels_map[address]; - const ASTNode label = ASTBase::Make(main_node, index); - labels[index] = label; - program->nodes.PushBack(label); -} - -void ASTManager::InsertGoto(Expr condition, u32 address) { - const u32 index = labels_map[address]; - const ASTNode goto_node = ASTBase::Make(main_node, std::move(condition), index); - gotos.push_back(goto_node); - program->nodes.PushBack(goto_node); -} - -void ASTManager::InsertBlock(u32 start_address, u32 end_address) { - ASTNode block = ASTBase::Make(main_node, start_address, end_address); - program->nodes.PushBack(std::move(block)); -} - -void ASTManager::InsertReturn(Expr condition, bool kills) { - ASTNode node = ASTBase::Make(main_node, std::move(condition), kills); - program->nodes.PushBack(std::move(node)); -} - -// The decompile algorithm is based on -// "Taming control flow: A structured approach to eliminating goto statements" -// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be -// on the same structured level as the label which they jump to. This is done, -// through outward/inward movements and lifting. Once they are at the same -// level, you can enclose them in an "if" structure or a "do-while" structure. -void ASTManager::Decompile() { - auto it = gotos.begin(); - while (it != gotos.end()) { - const ASTNode goto_node = *it; - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - const ASTNode label = labels[*label_index]; - if (!full_decompile) { - // We only decompile backward jumps - if (!IsBackwardsJump(goto_node, label)) { - it++; - continue; - } - } - if (IndirectlyRelated(goto_node, label)) { - while (!DirectlyRelated(goto_node, label)) { - MoveOutward(goto_node); - } - } - if (DirectlyRelated(goto_node, label)) { - u32 goto_level = goto_node->GetLevel(); - const u32 label_level = label->GetLevel(); - while (label_level < goto_level) { - MoveOutward(goto_node); - goto_level--; - } - // TODO(Blinkhawk): Implement Lifting and Inward Movements - } - if (label->GetParent() == goto_node->GetParent()) { - bool is_loop = false; - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label) { - is_loop = true; - break; - } - current = current->GetPrevious(); - } - - if (is_loop) { - EncloseDoWhile(goto_node, label); - } else { - EncloseIfThen(goto_node, label); - } - it = gotos.erase(it); - continue; - } - it++; - } - if (full_decompile) { - for (const ASTNode& label : labels) { - auto& manager = label->GetManager(); - manager.Remove(label); - } - labels.clear(); - } else { - auto label_it = labels.begin(); - while (label_it != labels.end()) { - bool can_remove = true; - ASTNode label = *label_it; - for (const ASTNode& goto_node : gotos) { - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - ASTNode& glabel = labels[*label_index]; - if (glabel == label) { - can_remove = false; - break; - } - } - if (can_remove) { - label->MarkLabelUnused(); - } - } - } -} - -bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { - u32 goto_level = goto_node->GetLevel(); - u32 label_level = label_node->GetLevel(); - while (goto_level > label_level) { - goto_level--; - goto_node = goto_node->GetParent(); - } - while (label_level > goto_level) { - label_level--; - label_node = label_node->GetParent(); - } - while (goto_node->GetParent() != label_node->GetParent()) { - goto_node = goto_node->GetParent(); - label_node = label_node->GetParent(); - } - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label_node) { - return true; - } - current = current->GetPrevious(); - } - return false; -} - -bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { - return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); -} - -bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { - if (first->GetParent() == second->GetParent()) { - return false; - } - const u32 first_level = first->GetLevel(); - const u32 second_level = second->GetLevel(); - u32 min_level; - u32 max_level; - ASTNode max; - ASTNode min; - if (first_level > second_level) { - min_level = second_level; - min = second; - max_level = first_level; - max = first; - } else { - min_level = first_level; - min = first; - max_level = second_level; - max = second; - } - - while (max_level > min_level) { - max_level--; - max = max->GetParent(); - } - - return min->GetParent() == max->GetParent(); -} - -void ASTManager::ShowCurrentState(std::string_view state) const { - LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); - SanityCheck(); -} - -void ASTManager::SanityCheck() const { - for (const auto& label : labels) { - if (!label->GetParent()) { - LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); - } - } -} - -void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode loop_start = label->GetNext(); - if (loop_start == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode parent = label->GetParent(); - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSegment(loop_start, goto_node); - const ASTNode do_while_node = ASTBase::Make(parent, condition); - ASTZipper* sub_zipper = do_while_node->GetSubNodes(); - sub_zipper->Init(loop_start, do_while_node); - zipper.InsertAfter(do_while_node, label); - sub_zipper->Remove(goto_node); -} - -void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode if_end = label->GetPrevious(); - if (if_end == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode prev = goto_node->GetPrevious(); - const Expr condition = goto_node->GetGotoCondition(); - bool do_else = false; - if (!disable_else_derivation && prev->IsIfThen()) { - const Expr if_condition = prev->GetIfCondition(); - do_else = ExprAreEqual(if_condition, condition); - } - const ASTNode parent = label->GetParent(); - zipper.DetachSegment(goto_node, if_end); - ASTNode if_node; - if (do_else) { - if_node = ASTBase::Make(parent); - } else { - Expr neg_condition = MakeExprNot(condition); - if_node = ASTBase::Make(parent, neg_condition); - } - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(goto_node, if_node); - zipper.InsertAfter(if_node, prev); - sub_zipper->Remove(goto_node); -} - -void ASTManager::MoveOutward(ASTNode goto_node) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode parent = goto_node->GetParent(); - ASTZipper& zipper2 = parent->GetManager(); - const ASTNode grandpa = parent->GetParent(); - const bool is_loop = parent->IsLoop(); - const bool is_else = parent->IsIfElse(); - const bool is_if = parent->IsIfThen(); - - const ASTNode prev = goto_node->GetPrevious(); - const ASTNode post = goto_node->GetNext(); - - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSingle(goto_node); - if (is_loop) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr(var_index); - const ASTNode var_node = ASTBase::Make(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make(parent, var_index, false_condition); - zipper2.InsertBefore(var_node_init, parent); - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - const ASTNode break_node = ASTBase::Make(parent, var_condition); - zipper.InsertAfter(break_node, var_node); - } else if (is_if || is_else) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr(var_index); - const ASTNode var_node = ASTBase::Make(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make(parent, var_index, false_condition); - if (is_if) { - zipper2.InsertBefore(var_node_init, parent); - } else { - zipper2.InsertBefore(var_node_init, parent->GetPrevious()); - } - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - if (post) { - zipper.DetachTail(post); - const ASTNode if_node = ASTBase::Make(parent, MakeExprNot(var_condition)); - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(post, if_node); - zipper.InsertAfter(if_node, var_node); - } - } else { - UNREACHABLE(); - } - const ASTNode next = parent->GetNext(); - if (is_if && next && next->IsIfElse()) { - zipper2.InsertAfter(goto_node, next); - goto_node->SetParent(grandpa); - return; - } - zipper2.InsertAfter(goto_node, parent); - goto_node->SetParent(grandpa); -} - -class ASTClearer { -public: - ASTClearer() = default; - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) { - ast.nodes.clear(); - } - - void operator()([[maybe_unused]] const ASTVarSet& ast) {} - - void operator()([[maybe_unused]] const ASTLabel& ast) {} - - void operator()([[maybe_unused]] const ASTGoto& ast) {} - - void operator()(const ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTReturn& ast) {} - - void operator()([[maybe_unused]] const ASTBreak& ast) {} - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - node->Clear(); - } -}; - -void ASTManager::Clear() { - if (!main_node) { - return; - } - ASTClearer clearer{}; - clearer.Visit(main_node); - main_node.reset(); - program = nullptr; - labels_map.clear(); - labels.clear(); - gotos.clear(); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h deleted file mode 100644 index dc49b369e..000000000 --- a/src/video_core/shader/ast.h +++ /dev/null @@ -1,398 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "video_core/shader/expr.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -class ASTBase; -class ASTBlockDecoded; -class ASTBlockEncoded; -class ASTBreak; -class ASTDoWhile; -class ASTGoto; -class ASTIfElse; -class ASTIfThen; -class ASTLabel; -class ASTProgram; -class ASTReturn; -class ASTVarSet; - -using ASTData = std::variant; - -using ASTNode = std::shared_ptr; - -enum class ASTZipperType : u32 { - Program, - IfThen, - IfElse, - Loop, -}; - -class ASTZipper final { -public: - explicit ASTZipper(); - - void Init(ASTNode first, ASTNode parent); - - ASTNode GetFirst() const { - return first; - } - - ASTNode GetLast() const { - return last; - } - - void PushBack(ASTNode new_node); - void PushFront(ASTNode new_node); - void InsertAfter(ASTNode new_node, ASTNode at_node); - void InsertBefore(ASTNode new_node, ASTNode at_node); - void DetachTail(ASTNode node); - void DetachSingle(ASTNode node); - void DetachSegment(ASTNode start, ASTNode end); - void Remove(ASTNode node); - - ASTNode first; - ASTNode last; -}; - -class ASTProgram { -public: - ASTZipper nodes{}; -}; - -class ASTIfThen { -public: - explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTIfElse { -public: - ASTZipper nodes{}; -}; - -class ASTBlockEncoded { -public: - explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} - u32 start; - u32 end; -}; - -class ASTBlockDecoded { -public: - explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} - NodeBlock nodes; -}; - -class ASTVarSet { -public: - explicit ASTVarSet(u32 index_, Expr condition_) - : index{index_}, condition{std::move(condition_)} {} - - u32 index; - Expr condition; -}; - -class ASTLabel { -public: - explicit ASTLabel(u32 index_) : index{index_} {} - u32 index; - bool unused{}; -}; - -class ASTGoto { -public: - explicit ASTGoto(Expr condition_, u32 label_) - : condition{std::move(condition_)}, label{label_} {} - - Expr condition; - u32 label; -}; - -class ASTDoWhile { -public: - explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTReturn { -public: - explicit ASTReturn(Expr condition_, bool kills_) - : condition{std::move(condition_)}, kills{kills_} {} - - Expr condition; - bool kills; -}; - -class ASTBreak { -public: - explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; -}; - -class ASTBase { -public: - explicit ASTBase(ASTNode parent_, ASTData data_) - : data{std::move(data_)}, parent{std::move(parent_)} {} - - template - static ASTNode Make(ASTNode parent, Args&&... args) { - return std::make_shared(std::move(parent), - ASTData(U(std::forward(args)...))); - } - - void SetParent(ASTNode new_parent) { - parent = std::move(new_parent); - } - - ASTNode& GetParent() { - return parent; - } - - const ASTNode& GetParent() const { - return parent; - } - - u32 GetLevel() const { - u32 level = 0; - auto next_parent = parent; - while (next_parent) { - next_parent = next_parent->GetParent(); - level++; - } - return level; - } - - ASTData* GetInnerData() { - return &data; - } - - const ASTData* GetInnerData() const { - return &data; - } - - ASTNode GetNext() const { - return next; - } - - ASTNode GetPrevious() const { - return previous; - } - - ASTZipper& GetManager() { - return *manager; - } - - const ASTZipper& GetManager() const { - return *manager; - } - - std::optional GetGotoLabel() const { - if (const auto* inner = std::get_if(&data)) { - return {inner->label}; - } - return std::nullopt; - } - - Expr GetGotoCondition() const { - if (const auto* inner = std::get_if(&data)) { - return inner->condition; - } - return nullptr; - } - - void MarkLabelUnused() { - if (auto* inner = std::get_if(&data)) { - inner->unused = true; - } - } - - bool IsLabelUnused() const { - if (const auto* inner = std::get_if(&data)) { - return inner->unused; - } - return true; - } - - std::optional GetLabelIndex() const { - if (const auto* inner = std::get_if(&data)) { - return {inner->index}; - } - return std::nullopt; - } - - Expr GetIfCondition() const { - if (const auto* inner = std::get_if(&data)) { - return inner->condition; - } - return nullptr; - } - - void SetGotoCondition(Expr new_condition) { - if (auto* inner = std::get_if(&data)) { - inner->condition = std::move(new_condition); - } - } - - bool IsIfThen() const { - return std::holds_alternative(data); - } - - bool IsIfElse() const { - return std::holds_alternative(data); - } - - bool IsBlockEncoded() const { - return std::holds_alternative(data); - } - - void TransformBlockEncoded(NodeBlock&& nodes) { - data = ASTBlockDecoded(std::move(nodes)); - } - - bool IsLoop() const { - return std::holds_alternative(data); - } - - ASTZipper* GetSubNodes() { - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - return nullptr; - } - - void Clear() { - next.reset(); - previous.reset(); - parent.reset(); - manager = nullptr; - } - -private: - friend class ASTZipper; - - ASTData data; - ASTNode parent; - ASTNode next; - ASTNode previous; - ASTZipper* manager{}; -}; - -class ASTManager final { -public: - explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); - ~ASTManager(); - - ASTManager(const ASTManager& o) = delete; - ASTManager& operator=(const ASTManager& other) = delete; - - ASTManager(ASTManager&& other) noexcept = default; - ASTManager& operator=(ASTManager&& other) noexcept = default; - - void Init(); - - void DeclareLabel(u32 address); - - void InsertLabel(u32 address); - - void InsertGoto(Expr condition, u32 address); - - void InsertBlock(u32 start_address, u32 end_address); - - void InsertReturn(Expr condition, bool kills); - - std::string Print() const; - - void Decompile(); - - void ShowCurrentState(std::string_view state) const; - - void SanityCheck() const; - - void Clear(); - - bool IsFullyDecompiled() const { - if (full_decompile) { - return gotos.empty(); - } - - for (ASTNode goto_node : gotos) { - auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return false; - } - ASTNode glabel = labels[*label_index]; - if (IsBackwardsJump(goto_node, glabel)) { - return false; - } - } - return true; - } - - ASTNode GetProgram() const { - return main_node; - } - - u32 GetVariables() const { - return variables; - } - - const std::vector& GetLabels() const { - return labels; - } - -private: - bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; - - bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - void EncloseDoWhile(ASTNode goto_node, ASTNode label); - - void EncloseIfThen(ASTNode goto_node, ASTNode label); - - void MoveOutward(ASTNode goto_node); - - u32 NewVariable() { - return variables++; - } - - bool full_decompile{}; - bool disable_else_derivation{}; - std::unordered_map labels_map{}; - u32 labels_count{}; - std::vector labels{}; - std::list gotos{}; - u32 variables{}; - ASTProgram* program{}; - ASTNode main_node{}; - Expr false_condition{}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp deleted file mode 100644 index 02adcf9c7..000000000 --- a/src/video_core/shader/async_shaders.cpp +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_base.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/shader/async_shaders.h" - -namespace VideoCommon::Shader { - -AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} - -AsyncShaders::~AsyncShaders() { - KillWorkers(); -} - -void AsyncShaders::AllocateWorkers() { - // Use at least one thread - u32 num_workers = 1; - - // Deduce how many more threads we can use - const u32 thread_count = std::thread::hardware_concurrency(); - if (thread_count >= 8) { - // Increase async workers by 1 for every 2 threads >= 8 - num_workers += 1 + (thread_count - 8) / 2; - } - - // If we already have workers queued, ignore - if (num_workers == worker_threads.size()) { - return; - } - - // If workers already exist, clear them - if (!worker_threads.empty()) { - FreeWorkers(); - } - - // Create workers - for (std::size_t i = 0; i < num_workers; i++) { - context_list.push_back(emu_window.CreateSharedContext()); - worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, - context_list[i].get()); - } -} - -void AsyncShaders::FreeWorkers() { - // Mark all threads to quit - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.join(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -void AsyncShaders::KillWorkers() { - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.detach(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -bool AsyncShaders::HasWorkQueued() const { - return !pending_queue.empty(); -} - -bool AsyncShaders::HasCompletedWork() const { - std::shared_lock lock{completed_mutex}; - return !finished_work.empty(); -} - -bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { - const auto& regs = gpu.Maxwell3D().regs; - - // If something is using depth, we can assume that games are not rendering anything which will - // be used one time. - if (regs.zeta_enable) { - return true; - } - - // If games are using a small index count, we can assume these are full screen quads. Usually - // these shaders are only used once for building textures so we can assume they can't be built - // async - if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { - return false; - } - - return true; -} - -std::vector AsyncShaders::GetCompletedWork() { - std::vector results; - { - std::unique_lock lock{completed_mutex}; - results = std::move(finished_work); - finished_work.clear(); - } - return results; -} - -void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, - Tegra::Engines::ShaderType shader_type, u64 uid, - std::vector code, std::vector code_b, - u32 main_offset, CompilerSettings compiler_settings, - const Registry& registry, VAddr cpu_addr) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, - .device = &device, - .shader_type = shader_type, - .uid = uid, - .code = std::move(code), - .code_b = std::move(code_b), - .main_offset = main_offset, - .compiler_settings = compiler_settings, - .registry = registry, - .cpu_address = cpu_addr, - .pp_cache = nullptr, - .vk_device = nullptr, - .scheduler = nullptr, - .descriptor_pool = nullptr, - .update_descriptor_queue = nullptr, - .bindings{}, - .program{}, - .key{}, - .num_color_buffers = 0, - }); - cv.notify_one(); -} - -void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, - const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector bindings, - Vulkan::SPIRVProgram program, - Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = Backend::Vulkan, - .device = nullptr, - .shader_type{}, - .uid = 0, - .code{}, - .code_b{}, - .main_offset = 0, - .compiler_settings{}, - .registry{}, - .cpu_address = 0, - .pp_cache = pp_cache, - .vk_device = &device, - .scheduler = &scheduler, - .descriptor_pool = &descriptor_pool, - .update_descriptor_queue = &update_descriptor_queue, - .bindings = std::move(bindings), - .program = std::move(program), - .key = key, - .num_color_buffers = num_color_buffers, - }); - cv.notify_one(); -} - -void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { - while (!is_thread_exiting.load(std::memory_order_relaxed)) { - std::unique_lock lock{queue_mutex}; - cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); - if (is_thread_exiting) { - return; - } - - // Partial lock to allow all threads to read at the same time - if (!HasWorkQueued()) { - continue; - } - // Another thread beat us, just unlock and wait for the next load - if (pending_queue.empty()) { - continue; - } - - // Pull work from queue - WorkerParams work = std::move(pending_queue.front()); - pending_queue.pop(); - lock.unlock(); - - if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { - const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); - const auto scope = context->Acquire(); - auto program = - OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); - Result result{}; - result.backend = work.backend; - result.cpu_address = work.cpu_address; - result.uid = work.uid; - result.code = std::move(work.code); - result.code_b = std::move(work.code_b); - result.shader_type = work.shader_type; - - if (work.backend == Backend::OpenGL) { - result.program.opengl = std::move(program->source_program); - } else if (work.backend == Backend::GLASM) { - result.program.glasm = std::move(program->assembly_program); - } - - { - std::unique_lock complete_lock(completed_mutex); - finished_work.push_back(std::move(result)); - } - } else if (work.backend == Backend::Vulkan) { - auto pipeline = std::make_unique( - *work.vk_device, *work.scheduler, *work.descriptor_pool, - *work.update_descriptor_queue, work.key, work.bindings, work.program, - work.num_color_buffers); - - work.pp_cache->EmplacePipeline(std::move(pipeline)); - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h deleted file mode 100644 index 7fdff6e56..000000000 --- a/src/video_core/shader/async_shaders.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Core::Frontend { -class EmuWindow; -class GraphicsContext; -} // namespace Core::Frontend - -namespace Tegra { -class GPU; -} - -namespace Vulkan { -class VKPipelineCache; -} - -namespace VideoCommon::Shader { - -class AsyncShaders { -public: - enum class Backend { - OpenGL, - GLASM, - Vulkan, - }; - - struct ResultPrograms { - OpenGL::OGLProgram opengl; - OpenGL::OGLAssemblyProgram glasm; - }; - - struct Result { - u64 uid; - VAddr cpu_address; - Backend backend; - ResultPrograms program; - std::vector code; - std::vector code_b; - Tegra::Engines::ShaderType shader_type; - }; - - explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); - ~AsyncShaders(); - - /// Start up shader worker threads - void AllocateWorkers(); - - /// Clear the shader queue and kill all worker threads - void FreeWorkers(); - - // Force end all threads - void KillWorkers(); - - /// Check to see if any shaders have actually been compiled - [[nodiscard]] bool HasCompletedWork() const; - - /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build - /// every shader async as some shaders are only built and executed once. We try to "guess" which - /// shader would be used only once - [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; - - /// Pulls completed compiled shaders - [[nodiscard]] std::vector GetCompletedWork(); - - void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, - u64 uid, std::vector code, std::vector code_b, u32 main_offset, - CompilerSettings compiler_settings, const Registry& registry, - VAddr cpu_addr); - - void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, - Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector bindings, - Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, - u32 num_color_buffers); - -private: - void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); - - /// Check our worker queue to see if we have any work queued already - [[nodiscard]] bool HasWorkQueued() const; - - struct WorkerParams { - Backend backend; - // For OGL - const OpenGL::Device* device; - Tegra::Engines::ShaderType shader_type; - u64 uid; - std::vector code; - std::vector code_b; - u32 main_offset; - CompilerSettings compiler_settings; - std::optional registry; - VAddr cpu_address; - - // For Vulkan - Vulkan::VKPipelineCache* pp_cache; - const Vulkan::Device* vk_device; - Vulkan::VKScheduler* scheduler; - Vulkan::VKDescriptorPool* descriptor_pool; - Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; - std::vector bindings; - Vulkan::SPIRVProgram program; - Vulkan::GraphicsPipelineCacheKey key; - u32 num_color_buffers; - }; - - std::condition_variable cv; - mutable std::mutex queue_mutex; - mutable std::shared_mutex completed_mutex; - std::atomic is_thread_exiting{}; - std::vector> context_list; - std::vector worker_threads; - std::queue pending_queue; - std::vector finished_work; - Core::Frontend::EmuWindow& emu_window; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp deleted file mode 100644 index cddcbd4f0..000000000 --- a/src/video_core/shader/compiler_settings.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/shader/compiler_settings.h" - -namespace VideoCommon::Shader { - -std::string CompileDepthAsString(const CompileDepth cd) { - switch (cd) { - case CompileDepth::BruteForce: - return "Brute Force Compile"; - case CompileDepth::FlowStack: - return "Simple Flow Stack Mode"; - case CompileDepth::NoFlowStack: - return "Remove Flow Stack"; - case CompileDepth::DecompileBackwards: - return "Decompile Backward Jumps"; - case CompileDepth::FullDecompile: - return "Full Decompilation"; - default: - return "Unknown Compiler Process"; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h deleted file mode 100644 index 916018c01..000000000 --- a/src/video_core/shader/compiler_settings.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class CompileDepth : u32 { - BruteForce = 0, - FlowStack = 1, - NoFlowStack = 2, - DecompileBackwards = 3, - FullDecompile = 4, -}; - -std::string CompileDepthAsString(CompileDepth cd); - -struct CompilerSettings { - CompileDepth depth{CompileDepth::NoFlowStack}; - bool disable_else_derivation{true}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp deleted file mode 100644 index 43d965f2f..000000000 --- a/src/video_core/shader/control_flow.cpp +++ /dev/null @@ -1,751 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -constexpr s32 unassigned_branch = -2; - -struct Query { - u32 address{}; - std::stack ssy_stack{}; - std::stack pbk_stack{}; -}; - -struct BlockStack { - BlockStack() = default; - explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} - std::stack ssy_stack{}; - std::stack pbk_stack{}; -}; - -template -BlockBranchInfo MakeBranchInfo(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -bool BlockBranchIsIgnored(BlockBranchInfo first) { - bool ignore = false; - if (std::holds_alternative(*first)) { - const auto branch = std::get_if(first.get()); - ignore = branch->ignore; - } - return ignore; -} - -struct BlockInfo { - u32 start{}; - u32 end{}; - bool visited{}; - BlockBranchInfo branch{}; - - bool IsInside(const u32 address) const { - return start <= address && address <= end; - } -}; - -struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) - : program_code{program_code_}, registry{registry_}, start{start_} {} - - const ProgramCode& program_code; - Registry& registry; - u32 start{}; - std::vector block_info; - std::list inspect_queries; - std::list queries; - std::unordered_map registered; - std::set labels; - std::map ssy_labels; - std::map pbk_labels; - std::unordered_map stacks; - ASTManager* manager{}; -}; - -enum class BlockCollision : u32 { None, Found, Inside }; - -std::pair TryGetBlock(CFGRebuildState& state, u32 address) { - const auto& blocks = state.block_info; - for (u32 index = 0; index < blocks.size(); index++) { - if (blocks[index].start == address) { - return {BlockCollision::Found, index}; - } - if (blocks[index].IsInside(address)) { - return {BlockCollision::Inside, index}; - } - } - return {BlockCollision::None, 0xFFFFFFFF}; -} - -struct ParseInfo { - BlockBranchInfo branch_info{}; - u32 end_address{}; -}; - -BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { - auto& it = state.block_info.emplace_back(); - it.start = start; - it.end = end; - const u32 index = static_cast(state.block_info.size() - 1); - state.registered.insert({start, index}); - return it; -} - -Pred GetPredicate(u32 index, bool negated) { - return static_cast(static_cast(index) + (negated ? 8ULL : 0ULL)); -} - -enum class ParseResult : u32 { - ControlCaught, - BlockEnd, - AbnormalFlow, -}; - -struct BranchIndirectInfo { - u32 buffer{}; - u32 offset{}; - u32 entries{}; - s32 relative_position{}; -}; - -struct BufferInfo { - u32 index; - u32 offset; -}; - -std::optional> GetBRXInfo(const CFGRebuildState& state, u32& pos) { - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() != OpCode::Id::BRX) { - return std::nullopt; - } - if (instr.brx.constant_buffer != 0) { - return std::nullopt; - } - --pos; - return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); -} - -template -// requires std::predicate -// requires std::invocable -std::optional TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, - PackCallable pack) { - for (; pos >= state.start; --pos) { - if (IsSchedInstruction(pos, state.start)) { - continue; - } - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (!opcode) { - continue; - } - if (test(instr, opcode->get())) { - --pos; - return std::make_optional(pack(instr, opcode->get())); - } - } - return std::nullopt; -} - -std::optional> TrackLDC(const CFGRebuildState& state, u32& pos, - u64 brx_tracked_register) { - return TrackInstruction>( - state, pos, - [brx_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::LD_C && - instr.gpr0.Value() == brx_tracked_register && - instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; - }, - [](auto instr, const auto& opcode) { - const BufferInfo info = {static_cast(instr.cbuf36.index.Value()), - static_cast(instr.cbuf36.GetOffset())}; - return std::make_pair(info, instr.gpr8.Value()); - }); -} - -std::optional TrackSHLRegister(const CFGRebuildState& state, u32& pos, - u64 ldc_tracked_register) { - return TrackInstruction( - state, pos, - [ldc_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::SHL_IMM && - instr.gpr0.Value() == ldc_tracked_register; - }, - [](auto instr, const auto&) { return instr.gpr8.Value(); }); -} - -std::optional TrackIMNMXValue(const CFGRebuildState& state, u32& pos, - u64 shl_tracked_register) { - return TrackInstruction( - state, pos, - [shl_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::IMNMX_IMM && - instr.gpr0.Value() == shl_tracked_register; - }, - [](auto instr, const auto&) { - return static_cast(instr.alu.GetSignedImm20_20() + 1); - }); -} - -std::optional TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { - const auto brx_info = GetBRXInfo(state, pos); - if (!brx_info) { - return std::nullopt; - } - const auto [relative_position, brx_tracked_register] = *brx_info; - - const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); - if (!ldc_info) { - return std::nullopt; - } - const auto [buffer_info, ldc_tracked_register] = *ldc_info; - - const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); - if (!shl_tracked_register) { - return std::nullopt; - } - - const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); - if (!entries) { - return std::nullopt; - } - - return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; -} - -std::pair ParseCode(CFGRebuildState& state, u32 address) { - u32 offset = static_cast(address); - const u32 end_address = static_cast(state.program_code.size()); - ParseInfo parse_info{}; - SingleBranch single_branch{}; - - const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { - const auto pair = rebuild_state.labels.emplace(label_address); - if (pair.second) { - rebuild_state.inspect_queries.push_back(label_address); - } - }; - - while (true) { - if (offset >= end_address) { - // ASSERT_OR_EXECUTE can't be used, as it ignores the break - ASSERT_MSG(false, "Shader passed the current limit!"); - - single_branch.address = exit_branch; - single_branch.ignore = false; - break; - } - if (state.registered.contains(offset)) { - single_branch.address = offset; - single_branch.ignore = true; - break; - } - if (IsSchedInstruction(offset, state.start)) { - offset++; - continue; - } - const Instruction instr = {state.program_code[offset]}; - const auto opcode = OpCode::Decode(instr); - if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { - offset++; - continue; - } - - switch (opcode->get().GetId()) { - case OpCode::Id::EXIT: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRA: { - if (instr.bra.constant_buffer != 0) { - return {ParseResult::AbnormalFlow, parse_info}; - } - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - const u32 branch_offset = offset + instr.bra.GetBranchTarget(); - if (branch_offset == 0) { - single_branch.address = exit_branch; - } else { - single_branch.address = branch_offset; - } - insert_label(state, branch_offset); - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SYNC: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = true; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRK: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = true; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::KIL: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = true; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SSY: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.ssy_labels.emplace(offset, target); - break; - } - case OpCode::Id::PBK: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.pbk_labels.emplace(offset, target); - break; - } - case OpCode::Id::BRX: { - const auto tmp = TrackBranchIndirectInfo(state, offset); - if (!tmp) { - LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); - return {ParseResult::AbnormalFlow, parse_info}; - } - - const auto result = *tmp; - const s32 pc_target = offset + result.relative_position; - std::vector branches; - for (u32 i = 0; i < result.entries; i++) { - auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); - if (!key) { - return {ParseResult::AbnormalFlow, parse_info}; - } - u32 value = *key; - u32 target = static_cast((value >> 3) + pc_target); - insert_label(state, target); - branches.emplace_back(value, target); - } - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - static_cast(instr.gpr8.Value()), std::move(branches)); - - return {ParseResult::ControlCaught, parse_info}; - } - default: - break; - } - - offset++; - } - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - parse_info.end_address = offset - 1; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, - single_branch.is_brk, single_branch.ignore); - return {ParseResult::BlockEnd, parse_info}; -} - -bool TryInspectAddress(CFGRebuildState& state) { - if (state.inspect_queries.empty()) { - return false; - } - - const u32 address = state.inspect_queries.front(); - state.inspect_queries.pop_front(); - const auto [result, block_index] = TryGetBlock(state, address); - switch (result) { - case BlockCollision::Found: { - return true; - } - case BlockCollision::Inside: { - // This case is the tricky one: - // We need to split the block into 2 separate blocks - const u32 end = state.block_info[block_index].end; - BlockInfo& new_block = CreateBlockInfo(state, address, end); - BlockInfo& current_block = state.block_info[block_index]; - current_block.end = address - 1; - new_block.branch = std::move(current_block.branch); - BlockBranchInfo forward_branch = MakeBranchInfo(); - const auto branch = std::get_if(forward_branch.get()); - branch->address = address; - branch->ignore = true; - current_block.branch = std::move(forward_branch); - return true; - } - default: - break; - } - const auto [parse_result, parse_info] = ParseCode(state, address); - if (parse_result == ParseResult::AbnormalFlow) { - // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction - return false; - } - - BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); - block_info.branch = parse_info.branch_info; - if (std::holds_alternative(*block_info.branch)) { - const auto branch = std::get_if(block_info.branch.get()); - if (branch->condition.IsUnconditional()) { - return true; - } - const u32 fallthrough_address = parse_info.end_address + 1; - state.inspect_queries.push_front(fallthrough_address); - return true; - } - return true; -} - -bool TryQuery(CFGRebuildState& state) { - const auto gather_labels = [](std::stack& cc, std::map& labels, - BlockInfo& block) { - auto gather_start = labels.lower_bound(block.start); - const auto gather_end = labels.upper_bound(block.end); - while (gather_start != gather_end) { - cc.push(gather_start->second); - ++gather_start; - } - }; - if (state.queries.empty()) { - return false; - } - - Query& q = state.queries.front(); - const u32 block_index = state.registered[q.address]; - BlockInfo& block = state.block_info[block_index]; - // If the block is visited, check if the stacks match, else gather the ssy/pbk - // labels into the current stack and look if the branch at the end of the block - // consumes a label. Schedule new queries accordingly - if (block.visited) { - BlockStack& stack = state.stacks[q.address]; - const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && - (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); - state.queries.pop_front(); - return all_okay; - } - block.visited = true; - state.stacks.insert_or_assign(q.address, BlockStack{q}); - - Query q2(q); - state.queries.pop_front(); - gather_labels(q2.ssy_stack, state.ssy_labels, block); - gather_labels(q2.pbk_stack, state.pbk_labels, block); - if (std::holds_alternative(*block.branch)) { - auto* branch = std::get_if(block.branch.get()); - if (!branch->condition.IsUnconditional()) { - q2.address = block.end + 1; - state.queries.push_back(q2); - } - - auto& conditional_query = state.queries.emplace_back(q2); - if (branch->is_sync) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.ssy_stack.top(); - } - conditional_query.ssy_stack.pop(); - } - if (branch->is_brk) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.pbk_stack.top(); - } - conditional_query.pbk_stack.pop(); - } - conditional_query.address = branch->address; - return true; - } - - const auto* multi_branch = std::get_if(block.branch.get()); - for (const auto& branch_case : multi_branch->branches) { - auto& conditional_query = state.queries.emplace_back(q2); - conditional_query.address = branch_case.address; - } - - return true; -} - -void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { - const auto get_expr = [](const Condition& cond) -> Expr { - Expr result; - if (cond.cc != ConditionCode::T) { - result = MakeExpr(cond.cc); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast(cond.predicate); - bool negate = false; - if (pred > 7) { - negate = true; - pred -= 8; - } - Expr extra = MakeExpr(pred); - if (negate) { - extra = MakeExpr(std::move(extra)); - } - if (result) { - return MakeExpr(std::move(extra), std::move(result)); - } - return extra; - } - if (result) { - return result; - } - return MakeExpr(true); - }; - - if (std::holds_alternative(*branch_info)) { - const auto* branch = std::get_if(branch_info.get()); - if (branch->address < 0) { - if (branch->kill) { - mm.InsertReturn(get_expr(branch->condition), true); - return; - } - mm.InsertReturn(get_expr(branch->condition), false); - return; - } - mm.InsertGoto(get_expr(branch->condition), branch->address); - return; - } - const auto* multi_branch = std::get_if(branch_info.get()); - for (const auto& branch_case : multi_branch->branches) { - mm.InsertGoto(MakeExpr(multi_branch->gpr, branch_case.cmp_value), - branch_case.address); - } -} - -void DecompileShader(CFGRebuildState& state) { - state.manager->Init(); - for (auto label : state.labels) { - state.manager->DeclareLabel(label); - } - for (const auto& block : state.block_info) { - if (state.labels.contains(block.start)) { - state.manager->InsertLabel(block.start); - } - const bool ignore = BlockBranchIsIgnored(block.branch); - const u32 end = ignore ? block.end + 1 : block.end; - state.manager->InsertBlock(block.start, end); - if (!ignore) { - InsertBranch(*state.manager, block.branch); - } - } - state.manager->Decompile(); -} - -} // Anonymous namespace - -std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry) { - auto result_out = std::make_unique(); - if (settings.depth == CompileDepth::BruteForce) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - - CFGRebuildState state{program_code, start_address, registry}; - // Inspect Code and generate blocks - state.labels.clear(); - state.labels.emplace(start_address); - state.inspect_queries.push_back(state.start); - while (!state.inspect_queries.empty()) { - if (!TryInspectAddress(state)) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - } - - bool use_flow_stack = true; - - bool decompiled = false; - - if (settings.depth != CompileDepth::FlowStack) { - // Decompile Stacks - state.queries.push_back(Query{state.start, {}, {}}); - decompiled = true; - while (!state.queries.empty()) { - if (!TryQuery(state)) { - decompiled = false; - break; - } - } - } - - use_flow_stack = !decompiled; - - // Sort and organize results - std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); - if (decompiled && settings.depth != CompileDepth::NoFlowStack) { - ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, - settings.disable_else_derivation}; - state.manager = &manager; - DecompileShader(state); - decompiled = state.manager->IsFullyDecompiled(); - if (!decompiled) { - if (settings.depth == CompileDepth::FullDecompile) { - LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); - } else { - LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); - } - state.manager->ShowCurrentState("Of Shader"); - state.manager->Clear(); - } else { - auto characteristics = std::make_unique(); - characteristics->start = start_address; - characteristics->settings.depth = settings.depth; - characteristics->manager = std::move(manager); - characteristics->end = state.block_info.back().end + 1; - return characteristics; - } - } - - result_out->start = start_address; - result_out->settings.depth = - use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; - result_out->blocks.clear(); - for (auto& block : state.block_info) { - ShaderBlock new_block{}; - new_block.start = block.start; - new_block.end = block.end; - new_block.ignore_branch = BlockBranchIsIgnored(block.branch); - if (!new_block.ignore_branch) { - new_block.branch = block.branch; - } - result_out->end = std::max(result_out->end, block.end); - result_out->blocks.push_back(new_block); - } - if (!use_flow_stack) { - result_out->labels = std::move(state.labels); - return result_out; - } - - auto back = result_out->blocks.begin(); - auto next = std::next(back); - while (next != result_out->blocks.end()) { - if (!state.labels.contains(next->start) && next->start == back->end + 1) { - back->end = next->end; - next = result_out->blocks.erase(next); - continue; - } - back = next; - ++next; - } - - return result_out; -} -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h deleted file mode 100644 index 37bf96492..000000000 --- a/src/video_core/shader/control_flow.h +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -constexpr s32 exit_branch = -1; - -struct Condition { - Pred predicate{Pred::UnusedIndex}; - ConditionCode cc{ConditionCode::T}; - - bool IsUnconditional() const { - return predicate == Pred::UnusedIndex && cc == ConditionCode::T; - } - - bool operator==(const Condition& other) const { - return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); - } - - bool operator!=(const Condition& other) const { - return !operator==(other); - } -}; - -class SingleBranch { -public: - SingleBranch() = default; - explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, - bool is_brk_, bool ignore_) - : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, - ignore{ignore_} {} - - bool operator==(const SingleBranch& b) const { - return std::tie(condition, address, kill, is_sync, is_brk, ignore) == - std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); - } - - bool operator!=(const SingleBranch& b) const { - return !operator==(b); - } - - Condition condition{}; - s32 address{exit_branch}; - bool kill{}; - bool is_sync{}; - bool is_brk{}; - bool ignore{}; -}; - -struct CaseBranch { - explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} - u32 cmp_value; - u32 address; -}; - -class MultiBranch { -public: - explicit MultiBranch(u32 gpr_, std::vector&& branches_) - : gpr{gpr_}, branches{std::move(branches_)} {} - - u32 gpr{}; - std::vector branches{}; -}; - -using BranchData = std::variant; -using BlockBranchInfo = std::shared_ptr; - -bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); - -struct ShaderBlock { - u32 start{}; - u32 end{}; - bool ignore_branch{}; - BlockBranchInfo branch{}; - - bool operator==(const ShaderBlock& sb) const { - return std::tie(start, end, ignore_branch) == - std::tie(sb.start, sb.end, sb.ignore_branch) && - BlockBranchInfoAreEqual(branch, sb.branch); - } - - bool operator!=(const ShaderBlock& sb) const { - return !operator==(sb); - } -}; - -struct ShaderCharacteristics { - std::list blocks{}; - std::set labels{}; - u32 start{}; - u32 end{}; - ASTManager manager{true, true}; - CompilerSettings settings{}; -}; - -std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp deleted file mode 100644 index 6576d1208..000000000 --- a/src/video_core/shader/decode.cpp +++ /dev/null @@ -1,368 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { - -void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { - if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { - return; - } - u32 count{}; - std::vector bound_offsets; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - ++count; - bound_offsets.emplace_back(sampler.offset); - } - if (count > 1) { - gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); - } -} - -std::optional TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, - VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { - const u32 base_offset = sampler_to_deduce.offset; - u32 max_offset{std::numeric_limits::max()}; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - if (sampler.offset > base_offset) { - max_offset = std::min(sampler.offset, max_offset); - } - } - if (max_offset == std::numeric_limits::max()) { - return std::nullopt; - } - return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); -} - -} // Anonymous namespace - -class ASTDecoder { -public: - explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} - - void operator()(ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) {} - - void operator()(ASTVarSet& ast) {} - - void operator()(ASTLabel& ast) {} - - void operator()(ASTGoto& ast) {} - - void operator()(ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTReturn& ast) {} - - void operator()(ASTBreak& ast) {} - - void Visit(ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - if (node->IsBlockEncoded()) { - auto block = std::get_if(node->GetInnerData()); - NodeBlock bb = ir.DecodeRange(block->start, block->end); - node->TransformBlockEncoded(std::move(bb)); - } - } - -private: - ShaderIR& ir; -}; - -void ShaderIR::Decode() { - std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - - decompiled = false; - auto info = ScanFlow(program_code, main_offset, settings, registry); - auto& shader_info = *info; - coverage_begin = shader_info.start; - coverage_end = shader_info.end; - switch (shader_info.settings.depth) { - case CompileDepth::FlowStack: { - for (const auto& block : shader_info.blocks) { - basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); - } - break; - } - case CompileDepth::NoFlowStack: { - disable_flow_stack = true; - const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == static_cast(exit_branch)) { - return; - } - basic_blocks.insert({label, nodes}); - }; - const auto& blocks = shader_info.blocks; - NodeBlock current_block; - u32 current_label = static_cast(exit_branch); - for (const auto& block : blocks) { - if (shader_info.labels.contains(block.start)) { - insert_block(current_block, current_label); - current_block.clear(); - current_label = block.start; - } - if (!block.ignore_branch) { - DecodeRangeInner(current_block, block.start, block.end); - InsertControlFlow(current_block, block); - } else { - DecodeRangeInner(current_block, block.start, block.end + 1); - } - } - insert_block(current_block, current_label); - break; - } - case CompileDepth::DecompileBackwards: - case CompileDepth::FullDecompile: { - program_manager = std::move(shader_info.manager); - disable_flow_stack = true; - decompiled = true; - ASTDecoder decoder{*this}; - ASTNode program = GetASTProgram(); - decoder.Visit(program); - break; - } - default: - LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); - [[fallthrough]]; - case CompileDepth::BruteForce: { - const auto shader_end = static_cast(program_code.size()); - coverage_begin = main_offset; - coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; ++label) { - basic_blocks.insert({label, DecodeRange(label, label + 1)}); - } - break; - } - } - if (settings.depth != shader_info.settings.depth) { - LOG_WARNING( - HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", - CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); - } -} - -NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { - NodeBlock basic_block; - DecodeRangeInner(basic_block, begin, end); - return basic_block; -} - -void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { - for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { - pc = DecodeInstr(bb, pc); - } -} - -void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { - const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { - Node result = n; - if (cond.cc != ConditionCode::T) { - result = Conditional(GetConditionCode(cond.cc), {result}); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast(cond.predicate); - const bool is_neg = pred > 7; - if (is_neg) { - pred -= 8; - } - result = Conditional(GetPredicate(pred, is_neg), {result}); - } - return result; - }; - if (std::holds_alternative(*block.branch)) { - auto branch = std::get_if(block.branch.get()); - if (branch->address < 0) { - if (branch->kill) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Branch, Immediate(branch->address)); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - auto multi_branch = std::get_if(block.branch.get()); - Node op_a = GetRegister(multi_branch->gpr); - for (auto& branch_case : multi_branch->branches) { - Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); - Node op_b = Immediate(branch_case.cmp_value); - Node condition = - GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); - auto result = Conditional(condition, {n}); - bb.push_back(result); - global_code.push_back(result); - } -} - -u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { - // Ignore sched instructions when generating code. - if (IsSchedInstruction(pc, main_offset)) { - return pc + 1; - } - - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - const u32 nv_address = ConvertAddressToNvidiaSpace(pc); - - // Decoding failure - if (!opcode) { - UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); - bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", - nv_address, instr.value))); - return pc + 1; - } - - bb.push_back(Comment( - fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); - - using Tegra::Shader::Pred; - UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, - "NeverExecute predicate not implemented"); - - static const std::map decoders = { - {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, - {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, - {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, - {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, - {OpCode::Type::Shift, &ShaderIR::DecodeShift}, - {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, - {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, - {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, - {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, - {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, - {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, - {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, - {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, - {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, - {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, - {OpCode::Type::Image, &ShaderIR::DecodeImage}, - {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, - {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, - {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, - {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, - {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, - {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, - {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, - {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, - {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, - {OpCode::Type::Video, &ShaderIR::DecodeVideo}, - {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, - }; - - std::vector tmp_block; - if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { - pc = (this->*decoder->second)(tmp_block, pc); - } else { - pc = DecodeOther(tmp_block, pc); - } - - // Some instructions (like SSY) don't have a predicate field, they are always unconditionally - // executed. - const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); - const auto pred_index = static_cast(instr.pred.pred_index); - - if (can_be_predicated && pred_index != static_cast(Pred::UnusedIndex)) { - const Node conditional = - Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); - global_code.push_back(conditional); - bb.push_back(conditional); - } else { - for (auto& node : tmp_block) { - global_code.push_back(node); - bb.push_back(node); - } - } - - return pc + 1; -} - -void ShaderIR::PostDecode() { - // Deduce texture handler size if needed - auto gpu_driver = registry.AccessGuestDriverProfile(); - DeduceTextureHandlerSize(gpu_driver, used_samplers); - // Deduce Indexed Samplers - if (!uses_indexed_samplers) { - return; - } - for (auto& sampler : used_samplers) { - if (!sampler.is_indexed) { - continue; - } - if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { - sampler.size = *size; - } else { - LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); - sampler.size = 1; - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp deleted file mode 100644 index 15eb700e7..000000000 --- a/src/video_core/shader/decode/arithmetic.cpp +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::SubOp; - -u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - - Node op_b = [&] { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV_C: - case OpCode::Id::MOV_R: { - // MOV does not have neither 'abs' nor 'neg' bits. - SetRegister(bb, instr.gpr0, op_b); - break; - } - case OpCode::Id::FMUL_C: - case OpCode::Id::FMUL_R: - case OpCode::Id::FMUL_IMM: { - // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - if (instr.fmul.tab5cb8_2 != 0) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - } - if (instr.fmul.tab5c68_0 != 1) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); - } - - op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); - - static constexpr std::array FmulPostFactor = { - 1.000f, // None - 0.500f, // Divide 2 - 0.250f, // Divide 4 - 0.125f, // Divide 8 - 8.000f, // Mul 8 - 4.000f, // Mul 4 - 2.000f, // Mul 2 - }; - - if (instr.fmul.postfactor != 0) { - op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, - Immediate(FmulPostFactor[instr.fmul.postfactor])); - } - - // TODO(Rodrigo): Should precise be used when there's a postfactor? - Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); - - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD_C: - case OpCode::Id::FADD_R: - case OpCode::Id::FADD_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::MUFU: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - - Node value = [&]() { - switch (instr.sub_op) { - case SubOp::Cos: - return Operation(OperationCode::FCos, PRECISE, op_a); - case SubOp::Sin: - return Operation(OperationCode::FSin, PRECISE, op_a); - case SubOp::Ex2: - return Operation(OperationCode::FExp2, PRECISE, op_a); - case SubOp::Lg2: - return Operation(OperationCode::FLog2, PRECISE, op_a); - case SubOp::Rcp: - return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); - case SubOp::Rsq: - return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); - case SubOp::Sqrt: - return Operation(OperationCode::FSqrt, PRECISE, op_a); - default: - UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); - return Immediate(0); - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FMNMX_C: - case OpCode::Id::FMNMX_R: - case OpCode::Id::FMNMX_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); - - const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); - const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FCMP_RR: - case OpCode::Id::FCMP_RC: - case OpCode::Id::FCMP_IMMR: { - UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); - Node op_c = GetRegister(instr.gpr39); - Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); - break; - } - case OpCode::Id::RRO_C: - case OpCode::Id::RRO_R: - case OpCode::Id::RRO_IMM: { - LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); - - // Currently RRO is only implemented as a register move. - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - SetRegister(bb, instr.gpr0, op_b); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp deleted file mode 100644 index 88103fede..000000000 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - bool negate_a = false; - bool negate_b = false; - bool absolute_a = false; - bool absolute_b = false; - - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_R: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HADD2_C: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 56) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - case OpCode::Id::HMUL2_R: - negate_a = ((instr.value >> 43) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HMUL2_C: - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - default: - UNREACHABLE(); - break; - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); - op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); - - auto [type_b, op_b] = [this, instr, opcode]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HMUL2_C: - return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HADD2_R: - case OpCode::Id::HMUL2_R: - return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; - default: - UNREACHABLE(); - return {HalfType::F32, Immediate(0)}; - } - }(); - op_b = UnpackHalfFloat(op_b, type_b); - op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); - - Node value = [this, opcode, op_a, op_b = op_b] { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HADD2_R: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_C: - case OpCode::Id::HMUL2_R: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); - return Immediate(0); - } - }(); - value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp deleted file mode 100644 index d179b9873..000000000 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - if (instr.alu_half_imm.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } else { - if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); - - const Node op_b = UnpackHalfImmediate(instr, true); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_IMM: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_IMM: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); - SetRegister(bb, instr.gpr0, value); - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp deleted file mode 100644 index f1875967c..000000000 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV32_IMM: { - SetRegister(bb, instr.gpr0, GetImmediate32(instr)); - break; - } - case OpCode::Id::FMUL32_IMM: { - Node value = - Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); - value = GetSaturatedFloat(value, instr.fmul32.saturate); - - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD32I: { - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, - instr.fadd32i.negate_a); - const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, - instr.fadd32i.negate_b); - - const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp deleted file mode 100644 index 7b5bb7003..000000000 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::IAdd3Height; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD_C: - case OpCode::Id::IADD_R: - case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); - UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - Node value = Operation(OperationCode::UAdd, op_a, op_b); - - if (instr.iadd.x) { - Node carry = GetInternalFlag(InternalFlag::Carry); - Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); - value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); - } - - if (instr.generates_cc) { - const Node i0 = Immediate(0); - - Node zero = Operation(OperationCode::LogicalIEqual, value, i0); - Node sign = Operation(OperationCode::LogicalILessThan, value, i0); - Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); - - Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); - Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); - Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); - Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); - - SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); - SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); - SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); - SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::IADD3_C: - case OpCode::Id::IADD3_R: - case OpCode::Id::IADD3_IMM: { - Node op_c = GetRegister(instr.gpr39); - - const auto ApplyHeight = [&](IAdd3Height height, Node value) { - switch (height) { - case IAdd3Height::None: - return value; - case IAdd3Height::LowerHalfWord: - return BitfieldExtract(value, 0, 16); - case IAdd3Height::UpperHalfWord: - return BitfieldExtract(value, 16, 16); - default: - UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); - return Immediate(0); - } - }; - - if (opcode->get().GetId() == OpCode::Id::IADD3_R) { - op_a = ApplyHeight(instr.iadd3.height_a, op_a); - op_b = ApplyHeight(instr.iadd3.height_b, op_b); - op_c = ApplyHeight(instr.iadd3.height_c, op_c); - } - - op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); - op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); - - const Node value = [&] { - Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); - if (opcode->get().GetId() != OpCode::Id::IADD3_R) { - return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); - } - const Node shifted = [&] { - switch (instr.iadd3.mode) { - case Tegra::Shader::IAdd3Mode::RightShift: - // TODO(tech4me): According to - // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 - // The addition between op_a and op_b should be done in uint33, more - // investigation required - return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, - Immediate(16)); - case Tegra::Shader::IAdd3Mode::LeftShift: - return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, - Immediate(16)); - default: - return add_ab; - } - }(); - return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); - }(); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ISCADD_C: - case OpCode::Id::ISCADD_R: - case OpCode::Id::ISCADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in ISCADD is not implemented"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - const Node shift = Immediate(static_cast(instr.alu_integer.shift_amount)); - const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); - const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::POPC_C: - case OpCode::Id::POPC_R: - case OpCode::Id::POPC_IMM: { - if (instr.popc.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - } - const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FLO_R: - case OpCode::Id::FLO_C: - case OpCode::Id::FLO_IMM: { - Node value; - if (instr.flo.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.is_signed) { - value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); - } else { - value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.sh) { - value = - Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::SEL_C: - case OpCode::Id::SEL_R: - case OpCode::Id::SEL_IMM: { - const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); - const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ICMP_CR: - case OpCode::Id::ICMP_R: - case OpCode::Id::ICMP_RC: - case OpCode::Id::ICMP_IMM: { - const Node zero = Immediate(0); - - const auto [op_rhs, test] = [&]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::ICMP_CR: - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_R: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::ICMP_IMM: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {zero, zero}; - } - }(); - const Node op_lhs = GetRegister(instr.gpr8); - const Node comparison = - GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); - SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); - break; - } - case OpCode::Id::LOP_C: - case OpCode::Id::LOP_R: - case OpCode::Id::LOP_IMM: { - if (instr.alu.lop.invert_a) - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); - if (instr.alu.lop.invert_b) - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, - instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, - instr.generates_cc); - break; - } - case OpCode::Id::LOP3_C: - case OpCode::Id::LOP3_R: - case OpCode::Id::LOP3_IMM: { - const Node op_c = GetRegister(instr.gpr39); - const Node lut = [&]() { - if (opcode->get().GetId() == OpCode::Id::LOP3_R) { - return Immediate(instr.alu.lop3.GetImmLut28()); - } else { - return Immediate(instr.alu.lop3.GetImmLut48()); - } - }(); - - WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); - break; - } - case OpCode::Id::IMNMX_C: - case OpCode::Id::IMNMX_R: - case OpCode::Id::IMNMX_IMM: { - UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); - - const bool is_signed = instr.imnmx.is_signed; - - const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); - const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); - const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::LEA_R2: - case OpCode::Id::LEA_R1: - case OpCode::Id::LEA_IMM: - case OpCode::Id::LEA_RZ: - case OpCode::Id::LEA_HI: { - auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::LEA_R2: { - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), - Immediate(static_cast(instr.lea.r2.entry_a))}; - } - case OpCode::Id::LEA_R1: { - const bool neg = instr.lea.r1.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - GetRegister(instr.gpr20), - Immediate(static_cast(instr.lea.r1.entry_a))}; - } - case OpCode::Id::LEA_IMM: { - const bool neg = instr.lea.imm.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast(instr.lea.imm.entry_a)), - Immediate(static_cast(instr.lea.imm.entry_b))}; - } - case OpCode::Id::LEA_RZ: { - const bool neg = instr.lea.rz.neg != 0; - return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), - GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast(instr.lea.rz.entry_a))}; - } - case OpCode::Id::LEA_HI: - default: - UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); - - return {Immediate(static_cast(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), - Immediate(static_cast(instr.lea.imm.entry_b))}; - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast(Pred::UnusedIndex), - "Unhandled LEA Predicate"); - - Node value = - Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); - value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); - SetRegister(bb, instr.gpr0, std::move(value)); - - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, - Node imm_lut, bool sets_cc) { - const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { - Node value = Immediate(0); - const ImmediateNode imm = std::get(*ttbl); - if (imm.GetValue() & 0x01) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x02) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x04) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x08) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x10) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x20) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x40) { - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x80) { - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - return value; - }(op_a, op_b, op_c, imm_lut); - - SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); - SetRegister(bb, dest, lop3_fast); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp deleted file mode 100644 index 73580277a..000000000 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::LogicOperation; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredicateResultMode; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = Immediate(static_cast(instr.alu.imm20_32)); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD32I: { - UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); - - op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); - - Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); - - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::LOP32I: { - if (instr.alu.lop32i.invert_a) { - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); - } - - if (instr.alu.lop32i.invert_b) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), - std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, - instr.op_32.generates_cc != 0); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, - Node op_b, PredicateResultMode predicate_mode, Pred predicate, - bool sets_cc) { - Node result = [&] { - switch (logic_op) { - case LogicOperation::And: - return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Or: - return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Xor: - return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::PassB: - return op_b; - default: - UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); - return Immediate(0); - } - }(); - - SetInternalFlagsFromInteger(bb, result, sets_cc); - SetRegister(bb, dest, result); - - // Write the predicate value depending on the predicate mode. - switch (predicate_mode) { - case PredicateResultMode::None: - // Do nothing. - return; - case PredicateResultMode::NotZero: { - // Set the predicate to true if the result is not zero. - Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); - SetPredicate(bb, static_cast(predicate), std::move(compare)); - break; - } - default: - UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp deleted file mode 100644 index 8e3b46e8e..000000000 --- a/src/video_core/shader/decode/bfe.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::BFE_R: - return GetRegister(instr.gpr20); - case OpCode::Id::BFE_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::BFE_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); - - const bool is_signed = instr.bfe.is_signed; - - // using reverse parallel method in - // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel - // note for later if possible to implement faster method. - if (instr.bfe.brev) { - const auto swap = [&](u32 s, u32 mask) { - Node v1 = - SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); - if (mask != 0) { - v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), - Immediate(mask)); - } - Node v2 = op_a; - if (mask != 0) { - v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), - Immediate(mask)); - } - v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), - Immediate(s)); - return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), - std::move(v2)); - }; - op_a = swap(1, 0x55555555U); - op_a = swap(2, 0x33333333U); - op_a = swap(4, 0x0F0F0F0FU); - op_a = swap(8, 0x00FF00FFU); - op_a = swap(16, 0); - } - - const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(0), Immediate(8)); - const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(8), Immediate(8)); - auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); - SetRegister(bb, instr.gpr0, std::move(result)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp deleted file mode 100644 index 70d1c055b..000000000 --- a/src/video_core/shader/decode/bfi.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto [packed_shift, base] = [&]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::BFI_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::BFI_IMM_R: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {Immediate(0), Immediate(0)}; - } - }(); - const Node insert = GetRegister(instr.gpr8); - const Node offset = BitfieldExtract(packed_shift, 0, 8); - const Node bits = BitfieldExtract(packed_shift, 8, 8); - - const Node value = - Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp deleted file mode 100644 index fea7a54df..000000000 --- a/src/video_core/shader/decode/conversion.cpp +++ /dev/null @@ -1,321 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; - -namespace { - -constexpr OperationCode GetFloatSelector(u64 selector) { - return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; -} - -constexpr u32 SizeInBits(Register::Size size) { - switch (size) { - case Register::Size::Byte: - return 8; - case Register::Size::Short: - return 16; - case Register::Size::Word: - return 32; - case Register::Size::Long: - return 64; - } - return 0; -} - -constexpr std::optional> IntegerSaturateBounds(Register::Size src_size, - Register::Size dst_size, - bool src_signed, - bool dst_signed) { - const u32 dst_bits = SizeInBits(dst_size); - if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { - if (src_signed == dst_signed) { - return std::nullopt; - } - return std::make_pair(0, std::numeric_limits::max()); - } - if (dst_signed) { - // Signed destination, clamp to [-128, 127] for instance - return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); - } else { - // Unsigned destination - if (dst_bits == 32) { - // Avoid shifting by 32, that is undefined behavior - return std::make_pair(0, s32(std::numeric_limits::max())); - } - return std::make_pair(0, (1 << dst_bits) - 1); - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - case OpCode::Id::I2I_C: - case OpCode::Id::I2I_IMM: { - const bool src_signed = instr.conversion.is_input_signed; - const bool dst_signed = instr.conversion.is_output_signed; - const Register::Size src_size = instr.conversion.src_size; - const Register::Size dst_size = instr.conversion.dst_size; - const u32 selector = static_cast(instr.conversion.int_src.selector); - - Node value = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2I_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - // Ensure the source selector is valid - switch (instr.conversion.src_size) { - case Register::Size::Byte: - break; - case Register::Size::Short: - ASSERT(selector == 0 || selector == 2); - break; - default: - ASSERT(selector == 0); - break; - } - - if (src_size != Register::Size::Word || selector != 0) { - value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), - Immediate(selector * 8), Immediate(SizeInBits(src_size))); - } - - value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, - instr.conversion.negate_a, src_signed); - - if (instr.alu.saturate_d) { - if (src_signed && !dst_signed) { - Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, - Immediate(1 << (SizeInBits(src_size) - 1))); - value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), - std::move(value)); - - // Simplify generated expressions, this can be removed without semantic impact - SetTemporary(bb, 0, std::move(value)); - value = GetTemporary(0); - - if (dst_size != Register::Size::Word) { - const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); - Node is_large = - Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); - value = Operation(OperationCode::Select, std::move(is_large), limit, - std::move(value)); - } - } else if (const std::optional bounds = - IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { - value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), - Immediate(bounds->first)); - value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), - Immediate(bounds->second)); - } - } else if (dst_size != Register::Size::Word) { - // No saturation, we only have to mask the result - Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); - value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::I2F_R: - case OpCode::Id::I2F_C: - case OpCode::Id::I2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in I2F is not implemented"); - - Node value = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2F_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const bool input_signed = instr.conversion.is_input_signed; - - if (const u32 offset = static_cast(instr.conversion.int_src.selector); offset > 0) { - ASSERT(instr.conversion.src_size == Register::Size::Byte || - instr.conversion.src_size == Register::Size::Short); - if (instr.conversion.src_size == Register::Size::Short) { - ASSERT(offset == 0 || offset == 2); - } - value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, - std::move(value), Immediate(offset * 8)); - } - - value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); - value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); - value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); - value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2F_R: - case OpCode::Id::F2F_C: - case OpCode::Id::F2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2F is not implemented"); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2F_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&] { - if (instr.conversion.src_size != instr.conversion.dst_size) { - // Rounding operations only matter when the source and destination conversion size - // is the same. - return value; - } - switch (instr.conversion.f2f.GetRoundingMode()) { - case Tegra::Shader::F2fRoundingOp::None: - return value; - case Tegra::Shader::F2fRoundingOp::Round: - return Operation(OperationCode::FRoundEven, value); - case Tegra::Shader::F2fRoundingOp::Floor: - return Operation(OperationCode::FFloor, value); - case Tegra::Shader::F2fRoundingOp::Ceil: - return Operation(OperationCode::FCeil, value); - case Tegra::Shader::F2fRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", - instr.conversion.f2f.rounding.Value()); - return value; - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2I_R: - case OpCode::Id::F2I_C: - case OpCode::Id::F2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2I is not implemented"); - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2I_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&]() { - switch (instr.conversion.f2i.rounding) { - case Tegra::Shader::F2iRoundingOp::RoundEven: - return Operation(OperationCode::FRoundEven, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Floor: - return Operation(OperationCode::FFloor, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Ceil: - return Operation(OperationCode::FCeil, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, PRECISE, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", - instr.conversion.f2i.rounding.Value()); - return Immediate(0); - } - }(); - const bool is_signed = instr.conversion.is_output_signed; - value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); - value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); - - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp deleted file mode 100644 index 5973588d6..000000000 --- a/src/video_core/shader/decode/ffma.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - if (instr.ffma.tab5980_0 != 1) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); - } - if (instr.ffma.tab5980_1 != 0) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); - } - - const Node op_a = GetRegister(instr.gpr8); - - auto [op_b, op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::FFMA_CR: { - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - } - case OpCode::Id::FFMA_RR: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::FFMA_RC: { - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - } - case OpCode::Id::FFMA_IMM: - return {GetImmediate19(instr), GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); - return {Immediate(0), Immediate(0)}; - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); - op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); - - Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp deleted file mode 100644 index 5614e8a0d..000000000 --- a/src/video_core/shader/decode/float_set.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, - instr.fset.neg_a != 0); - - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); - - // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the - // condition is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fset.op); - const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.fset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp deleted file mode 100644 index 200c2c983..000000000 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, - instr.fsetp.neg_a != 0); - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); - - // We can't use the constant predicate as destination. - ASSERT(instr.fsetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node predicate = - GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); - const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); - const Node value = Operation(combiner, predicate, second_pred); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.fsetp.pred3, value); - - if (instr.fsetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - const Node second_value = Operation(combiner, negated_pred, second_pred); - SetPredicate(bb, instr.fsetp.pred0, second_value); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp deleted file mode 100644 index fa83108cd..000000000 --- a/src/video_core/shader/decode/half_set.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - PredCondition cond{}; - bool bf = false; - bool ftz = false; - bool neg_a = false; - bool abs_a = false; - bool neg_b = false; - bool abs_b = false; - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - case OpCode::Id::HSET2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - bf = instr.Bit(53); - ftz = instr.Bit(54); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(56); - abs_b = instr.Bit(54); - break; - case OpCode::Id::HSET2_R: - cond = instr.hsetp2.reg.cond; - bf = instr.Bit(49); - ftz = instr.Bit(50); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(31); - abs_b = instr.Bit(30); - break; - default: - UNREACHABLE(); - } - - Node op_b = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - // Inform as unimplemented as this is not tested. - UNIMPLEMENTED_MSG("HSET2_C is not implemented"); - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::HSET2_R: - return GetRegister(instr.gpr20); - case OpCode::Id::HSET2_IMM: - return UnpackHalfImmediate(instr, true); - default: - UNREACHABLE(); - return Node{}; - } - }(); - - if (!ftz) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); - op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); - - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_R: - op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); - [[fallthrough]]; - case OpCode::Id::HSET2_C: - op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); - break; - default: - break; - } - - Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); - - Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); - - // HSET2 operates on each half float in the pack. - std::array values; - for (u32 i = 0; i < 2; ++i) { - const u32 raw_value = bf ? 0x3c00 : 0xffff; - Node true_value = Immediate(raw_value << (i * 16)); - Node false_value = Immediate(0); - - Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); - Node predicate = Operation(combiner, comparison, second_pred); - values[i] = - Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); - } - - Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); - SetRegister(bb, instr.gpr0, move(value)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp deleted file mode 100644 index 310655619..000000000 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (instr.hsetp2.ftz != 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - - Tegra::Shader::PredCondition cond{}; - bool h_and{}; - Node op_b{}; - switch (opcode->get().GetId()) { - case OpCode::Id::HSETP2_C: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); - // F32 is hardcoded in hardware - op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); - break; - case OpCode::Id::HSETP2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = UnpackHalfImmediate(instr, true); - break; - case OpCode::Id::HSETP2_R: - cond = instr.hsetp2.reg.cond; - h_and = instr.hsetp2.reg.h_and; - op_b = - GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), - instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); - break; - default: - UNREACHABLE(); - op_b = Immediate(0); - } - - const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); - - const auto Write = [&](u64 dest, Node src) { - SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); - }; - - const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); - const u64 first = instr.hsetp2.pred3; - const u64 second = instr.hsetp2.pred0; - if (h_and) { - Node joined = Operation(OperationCode::LogicalAnd2, comparison); - Write(first, joined); - Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); - } else { - Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); - Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp deleted file mode 100644 index 5b44cb79c..000000000 --- a/src/video_core/shader/decode/hfma2.cpp +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfPrecision; -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { - DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); - } else { - DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); - } - - constexpr auto identity = HalfType::H0_H1; - bool neg_b{}, neg_c{}; - auto [saturate, type_b, op_b, type_c, - op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::HFMA2_CR: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, HalfType::F32, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_RC: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), - HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HFMA2_RR: - neg_b = instr.hfma2.rr.negate_b; - neg_c = instr.hfma2.rr.negate_c; - return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), - instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_IMM_R: - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - default: - return {false, identity, Immediate(0), identity, Immediate(0)}; - } - }(); - - const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); - op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); - op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); - - Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedHalfFloat(value, saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp deleted file mode 100644 index 5470e8cf4..000000000 --- a/src/video_core/shader/decode/image.cpp +++ /dev/null @@ -1,536 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/textures/texture.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; -using Tegra::Shader::StoreType; -using Tegra::Texture::ComponentType; -using Tegra::Texture::TextureFormat; -using Tegra::Texture::TICEntry; - -namespace { - -ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, - std::size_t component) { - const TextureFormat format{descriptor.format}; - switch (format) { - case TextureFormat::R16G16B16A16: - case TextureFormat::R32G32B32A32: - case TextureFormat::R32G32B32: - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.b_type; - } - if (component == 3) { - return descriptor.a_type; - } - break; - case TextureFormat::A8R8G8B8: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.r_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.b_type; - } - break; - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.r_type; - } - break; - case TextureFormat::R32_B24G8: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - break; - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - if (component == 0) { - return descriptor.b_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.r_type; - } - break; - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - if (component == 0) { - return descriptor.g_type; - } - if (component == 1) { - return descriptor.r_type; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return ComponentType::FLOAT; -} - -bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array mask = { - 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), - (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask)}.test(component); -} - -u32 GetComponentSize(TextureFormat format, std::size_t component) { - switch (format) { - case TextureFormat::R32G32B32A32: - return 32; - case TextureFormat::R16G16B16A16: - return 16; - case TextureFormat::R32G32B32: - return component <= 2 ? 32 : 0; - case TextureFormat::R32G32: - return component <= 1 ? 32 : 0; - case TextureFormat::R16G16: - return component <= 1 ? 16 : 0; - case TextureFormat::R32: - return component == 0 ? 32 : 0; - case TextureFormat::R16: - return component == 0 ? 16 : 0; - case TextureFormat::R8: - return component == 0 ? 8 : 0; - case TextureFormat::R1: - return component == 0 ? 1 : 0; - case TextureFormat::A8R8G8B8: - return 8; - case TextureFormat::A2B10G10R10: - return (component == 3 || component == 2 || component == 1) ? 10 : 2; - case TextureFormat::A4B4G4R4: - return 4; - case TextureFormat::A5B5G5R1: - return (component == 0 || component == 1 || component == 2) ? 5 : 1; - case TextureFormat::A1B5G5R5: - return (component == 1 || component == 2 || component == 3) ? 5 : 1; - case TextureFormat::R32_B24G8: - if (component == 0) { - return 32; - } - if (component == 1) { - return 24; - } - if (component == 2) { - return 8; - } - return 0; - case TextureFormat::B5G6R5: - if (component == 0 || component == 2) { - return 5; - } - if (component == 1) { - return 6; - } - return 0; - case TextureFormat::B6G5R5: - if (component == 1 || component == 2) { - return 5; - } - if (component == 0) { - return 6; - } - return 0; - case TextureFormat::B10G11R11: - if (component == 1 || component == 2) { - return 11; - } - if (component == 0) { - return 10; - } - return 0; - case TextureFormat::R24G8: - if (component == 0) { - return 8; - } - if (component == 1) { - return 24; - } - return 0; - case TextureFormat::R8G24: - if (component == 0) { - return 24; - } - if (component == 1) { - return 8; - } - return 0; - case TextureFormat::R8G8: - return (component == 0 || component == 1) ? 8 : 0; - case TextureFormat::G4R4: - return (component == 0 || component == 1) ? 4 : 0; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return 0; - } -} - -std::size_t GetImageComponentMask(TextureFormat format) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - switch (format) { - case TextureFormat::R32G32B32A32: - case TextureFormat::R16G16B16A16: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - return std::size_t{R | G | B | A}; - case TextureFormat::R32G32B32: - case TextureFormat::R32_B24G8: - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - return std::size_t{R | G | B}; - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - return std::size_t{R | G}; - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - return std::size_t{R}; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return std::size_t{R | G | B | A}; - } -} - -std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - case Tegra::Shader::ImageType::TextureBuffer: - return 1; - case Tegra::Shader::ImageType::Texture1DArray: - case Tegra::Shader::ImageType::Texture2D: - return 2; - case Tegra::Shader::ImageType::Texture2DArray: - case Tegra::Shader::ImageType::Texture3D: - return 3; - } - UNREACHABLE(); - return 1; -} -} // Anonymous namespace - -std::pair ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, - Node original_value) { - switch (component_type) { - case ComponentType::SNORM: { - // range [-1.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast(1 << component_size) / 2.f - 1.f)); - cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); - return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; - } - case ComponentType::SINT: - case ComponentType::UNORM: { - bool is_signed = component_type == ComponentType::SINT; - // range [0.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast(1 << component_size) - 1.f)); - return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), - is_signed}; - } - case ComponentType::UINT: // range [0, (1 << component_size) - 1] - return {std::move(original_value), false}; - case ComponentType::FLOAT: - if (component_size == 16) { - return {Operation(OperationCode::HCastFloat, original_value), true}; - } else { - return {std::move(original_value), true}; - } - default: - UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); - return {std::move(original_value), true}; - } -} - -u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { - std::vector coords; - const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; - coords.reserve(num_coords); - for (std::size_t i = 0; i < num_coords; ++i) { - coords.push_back(GetRegister(instr.gpr8.Value() + i)); - } - return coords; - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::SULD: { - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkRead(); - - if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.suldst.IsComponentEnabled(element)) { - continue; - } - MetaImage meta{image, {}, element}; - Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { - UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && - instr.suldst.GetStoreDataLayout() != StoreType::Bits64); - - auto descriptor = [this, instr] { - std::optional sampler_descriptor; - if (instr.suldst.is_immediate) { - sampler_descriptor = - registry.ObtainBoundSampler(static_cast(instr.image.index.Value())); - } else { - const Node image_register = GetRegister(instr.gpr39); - const auto result = TrackCbuf(image_register, global_code, - static_cast(global_code.size())); - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); - } - if (!sampler_descriptor) { - UNREACHABLE_MSG("Failed to obtain image descriptor"); - } - return *sampler_descriptor; - }(); - - const auto comp_mask = GetImageComponentMask(descriptor.format); - - switch (instr.suldst.GetStoreDataLayout()) { - case StoreType::Bits32: - case StoreType::Bits64: { - u32 indexer = 0; - u32 shifted_counter = 0; - Node value = Immediate(0); - for (u32 element = 0; element < 4; ++element) { - if (!IsComponentEnabled(comp_mask, element)) { - continue; - } - const auto component_type = GetComponentType(descriptor, element); - const auto component_size = GetComponentSize(descriptor.format, element); - MetaImage meta{image, {}, element}; - - auto [converted_value, is_signed] = GetComponentValue( - component_type, component_size, - Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); - - // shift element to correct position - const auto shifted = shifted_counter; - if (shifted > 0) { - converted_value = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, - std::move(converted_value), Immediate(shifted)); - } - shifted_counter += component_size; - - // add value into result - value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); - - // if we shifted enough for 1 byte -> we save it into temp - if (shifted_counter >= 32) { - SetTemporary(bb, indexer++, std::move(value)); - // reset counter and value to prepare pack next byte - value = Immediate(0); - shifted_counter = 0; - } - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNREACHABLE(); - break; - } - } - break; - } - case OpCode::Id::SUST: { - UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA - - std::vector values; - constexpr std::size_t hardcoded_size{4}; - for (std::size_t i = 0; i < hardcoded_size; ++i) { - values.push_back(GetRegister(instr.gpr0.Value() + i)); - } - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkWrite(); - - MetaImage meta{image, std::move(values)}; - bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); - break; - } - case OpCode::Id::SUATOM: { - UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); - - const OperationCode operation_code = [instr] { - switch (instr.suatom_d.operation_type) { - case Tegra::Shader::ImageAtomicOperationType::S32: - case Tegra::Shader::ImageAtomicOperationType::U32: - switch (instr.suatom_d.operation) { - case Tegra::Shader::ImageAtomicOperation::Add: - return OperationCode::AtomicImageAdd; - case Tegra::Shader::ImageAtomicOperation::And: - return OperationCode::AtomicImageAnd; - case Tegra::Shader::ImageAtomicOperation::Or: - return OperationCode::AtomicImageOr; - case Tegra::Shader::ImageAtomicOperation::Xor: - return OperationCode::AtomicImageXor; - case Tegra::Shader::ImageAtomicOperation::Exch: - return OperationCode::AtomicImageExchange; - default: - break; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", - static_cast(instr.suatom_d.operation.Value()), - static_cast(instr.suatom_d.operation_type.Value())); - return OperationCode::AtomicImageAdd; - }(); - - Node value = GetRegister(instr.gpr0); - - const auto type = instr.suatom_d.image_type; - auto& image = GetImage(instr.image, type); - image.MarkAtomic(); - - MetaImage meta{image, {std::move(value)}}; - SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { - const auto offset = static_cast(image.index.Value()); - - const auto it = - std::find_if(std::begin(used_images), std::end(used_images), - [offset](const ImageEntry& entry) { return entry.offset == offset; }); - if (it != std::end(used_images)) { - ASSERT(!it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast(used_images.size()); - return used_images.emplace_back(next_index, offset, type); -} - -ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { - const Node image_register = GetRegister(reg); - const auto result = - TrackCbuf(image_register, global_code, static_cast(global_code.size())); - - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - - const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [buffer, offset](const ImageEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != std::end(used_images)) { - ASSERT(it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast(used_images.size()); - return used_images.emplace_back(next_index, offset, buffer, type); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp deleted file mode 100644 index 59809bcd8..000000000 --- a/src/video_core/shader/decode/integer_set.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition - // is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); - const Node first_pred = - GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.iset.op); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp deleted file mode 100644 index 25e48fef8..000000000 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // We can't use the constant predicate as destination. - ASSERT(instr.isetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); - const Node predicate = - GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); - const Node value = Operation(combiner, predicate, second_pred); - SetPredicate(bb, instr.isetp.pred3, value); - - if (instr.isetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp deleted file mode 100644 index 7728f600e..000000000 --- a/src/video_core/shader/decode/memory.cpp +++ /dev/null @@ -1,493 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::AtomicOp; -using Tegra::Shader::AtomicType; -using Tegra::Shader::Attribute; -using Tegra::Shader::GlobalAtomicType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::StoreType; - -namespace { - -OperationCode GetAtomOperation(AtomicOp op) { - switch (op) { - case AtomicOp::Add: - return OperationCode::AtomicIAdd; - case AtomicOp::Min: - return OperationCode::AtomicIMin; - case AtomicOp::Max: - return OperationCode::AtomicIMax; - case AtomicOp::And: - return OperationCode::AtomicIAnd; - case AtomicOp::Or: - return OperationCode::AtomicIOr; - case AtomicOp::Xor: - return OperationCode::AtomicIXor; - case AtomicOp::Exch: - return OperationCode::AtomicIExchange; - default: - UNIMPLEMENTED_MSG("op={}", op); - return OperationCode::AtomicIAdd; - } -} - -bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { - return uniform_type == Tegra::Shader::UniformType::UnsignedByte || - uniform_type == Tegra::Shader::UniformType::UnsignedShort; -} - -u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 0b11; - case Tegra::Shader::UniformType::UnsignedShort: - return 0b10; - default: - UNREACHABLE(); - return 0; - } -} - -u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 8; - case Tegra::Shader::UniformType::UnsignedShort: - return 16; - case Tegra::Shader::UniformType::Single: - return 32; - case Tegra::Shader::UniformType::Double: - return 64; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 128; - default: - UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); - return 32; - } -} - -Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); -} - -Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), - Immediate(size)); -} - -Node Sign16Extend(Node value) { - Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); - Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); - Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); - return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::LD_A: { - // Note: Shouldn't this be interp mode flat? As in no interpolation made. - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && - instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, - "Non-32 bits PHYS reads are not implemented"); - - const Node buffer{GetRegister(instr.gpr39)}; - - u64 next_element = instr.attribute.fmt20.element; - auto next_index = static_cast(instr.attribute.fmt20.index.Value()); - - const auto LoadNextElement = [&](u32 reg_offset) { - const Node attribute{instr.attribute.fmt20.IsPhysical() - ? GetPhysicalInputAttribute(instr.gpr8, buffer) - : GetInputAttribute(static_cast(next_index), - next_element, buffer)}; - - SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); - - // Load the next attribute element into the following register. If the element - // to load goes beyond the vec4 size, load the first element of the next - // attribute. - next_element = (next_element + 1) % 4; - next_index = next_index + (next_element == 0 ? 1 : 0); - }; - - const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - LoadNextElement(reg_offset); - } - break; - } - case OpCode::Id::LD_C: { - UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); - - Node index = GetRegister(instr.gpr8); - - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - - switch (instr.ld_c.type.Value()) { - case Tegra::Shader::UniformType::Single: - SetRegister(bb, instr.gpr0, op_a); - break; - - case Tegra::Shader::UniformType::Double: { - const Node op_b = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); - - SetTemporary(bb, 0, op_a); - SetTemporary(bb, 1, op_b); - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); - } - break; - } - case OpCode::Id::LD_L: - LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); - [[fallthrough]]; - case OpCode::Id::LD_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate_offset = Immediate(static_cast(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); - }; - const auto GetMemory = [&](s32 offset) { - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) - : GetLocalMemory(GetAddress(offset)); - }; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Signed16: - SetRegister(bb, instr.gpr0, - Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); - break; - case StoreType::Bits32: - case StoreType::Bits64: - case StoreType::Bits128: { - const u32 count = [&] { - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits32: - return 1; - case StoreType::Bits64: - return 2; - case StoreType::Bits128: - return 4; - default: - UNREACHABLE(); - return 0; - } - }(); - for (u32 i = 0; i < count; ++i) { - SetTemporary(bb, i, GetMemory(i * 4)); - } - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::LD: - case OpCode::Id::LDG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::LD: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); - return instr.generic.type; - case OpCode::Id::LDG: - return instr.ldg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, false); - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - if (!real_address_base || !base_address) { - // Tracking failed, load zeroes. - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); - } - break; - } - - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - Node gmem = MakeNode(real_address, base_address, descriptor); - - // To handle unaligned loads get the bytes used to dereference global memory and extract - // those bytes from the loaded u32. - if (IsUnaligned(type)) { - gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); - } - - SetTemporary(bb, i, gmem); - } - - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::ST_A: { - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - - u64 element = instr.attribute.fmt20.element; - auto index = static_cast(instr.attribute.fmt20.index.Value()); - - const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - Node dest; - if (instr.attribute.fmt20.patch) { - const u32 offset = static_cast(index) * 4 + static_cast(element); - dest = MakeNode(offset); - } else { - dest = GetOutputAttribute(static_cast(index), element, - GetRegister(instr.gpr39)); - } - const auto src = GetRegister(instr.gpr0.Value() + reg_offset); - - bb.push_back(Operation(OperationCode::Assign, dest, src)); - - // Load the next attribute element into the following register. If the element to load - // goes beyond the vec4 size, load the first element of the next attribute. - element = (element + 1) % 4; - index = index + (element == 0 ? 1 : 0); - } - break; - } - case OpCode::Id::ST_L: - LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); - [[fallthrough]]; - case OpCode::Id::ST_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate = Immediate(static_cast(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); - }; - - const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; - const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; - const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits128: - (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); - (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); - [[fallthrough]]; - case StoreType::Bits64: - (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); - [[fallthrough]]; - case StoreType::Bits32: - (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); - break; - case StoreType::Unsigned16: - case StoreType::Signed16: { - Node address = GetAddress(0); - Node memory = (this->*get_memory)(address); - (this->*set_memory)( - bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); - break; - } - default: - UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::ST: - case OpCode::Id::STG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::ST: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); - return instr.generic.type; - case OpCode::Id::STG: - return instr.stg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - // For unaligned reads we have to read memory too. - const bool is_read = IsUnaligned(type); - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, is_read, true); - if (!real_address_base || !base_address) { - // Tracking failed, skip the store. - break; - } - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - const Node gmem = MakeNode(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0.Value() + i); - - if (IsUnaligned(type)) { - const u32 mask = GetUnalignedMask(type); - value = InsertUnaligned(gmem, move(value), real_address, mask, size); - } - - bb.push_back(Operation(OperationCode::Assign, gmem, value)); - } - break; - } - case OpCode::Id::RED: { - UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", - instr.red.type.Value()); - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - Node gmem = MakeNode(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0); - bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); - break; - } - case OpCode::Id::ATOM: { - UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || - instr.atom.operation == AtomicOp::Dec || - instr.atom.operation == AtomicOp::SafeAdd, - "operation={}", instr.atom.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || - instr.atom.type == GlobalAtomicType::U64 || - instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || - instr.atom.type == GlobalAtomicType::F32_FTZ_RN, - "type={}", instr.atom.type.Value()); - - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - - const bool is_signed = - instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; - Node gmem = MakeNode(real_address, base_address, descriptor); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, - GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::ATOMS: { - UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || - instr.atoms.operation == AtomicOp::Dec, - "operation={}", instr.atoms.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || - instr.atoms.type == AtomicType::U64, - "type={}", instr.atoms.type.Value()); - const bool is_signed = - instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; - const s32 offset = instr.atoms.GetImmediateOffset(); - Node address = GetRegister(instr.gpr8); - address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, - GetSharedMemory(move(address)), GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::AL2P: { - // Ignore al2p.direction since we don't care about it. - - // Calculate emulation fake physical address. - const Node fixed_address{Immediate(static_cast(instr.al2p.address))}; - const Node reg{GetRegister(instr.gpr8)}; - const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; - - // Set the fake address to target register. - SetRegister(bb, instr.gpr0, fake_address); - - // Signal the shader IR to declare all possible attributes and varyings - uses_physical_attributes = true; - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -std::tuple ShaderIR::TrackGlobalMemory(NodeBlock& bb, - Instruction instr, - bool is_read, bool is_write) { - const auto addr_register{GetRegister(instr.gmem.gpr)}; - const auto immediate_offset{static_cast(instr.gmem.offset)}; - - const auto [base_address, index, offset] = - TrackCbuf(addr_register, global_code, static_cast(global_code.size())); - ASSERT_OR_EXECUTE_MSG( - base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, - "Global memory tracking failed"); - - bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); - - const GlobalMemoryBase descriptor{index, offset}; - const auto& entry = used_global_memory.try_emplace(descriptor).first; - auto& usage = entry->second; - usage.is_written |= is_write; - usage.is_read |= is_read; - - const auto real_address = - Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); - - return {real_address, base_address, descriptor}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp deleted file mode 100644 index 5f88537bc..000000000 --- a/src/video_core/shader/decode/other.cpp +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::OpCode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::SystemVariable; - -using Index = Tegra::Shader::Attribute::Index; - -u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::NOP: { - UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); - UNIMPLEMENTED_IF(instr.nop.trigger != 0); - // With the previous preconditions, this instruction is a no-operation. - break; - } - case OpCode::Id::EXIT: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); - - switch (instr.flow.cond) { - case Tegra::Shader::FlowCondition::Always: - bb.push_back(Operation(OperationCode::Exit)); - if (instr.pred.pred_index == static_cast(Pred::UnusedIndex)) { - // If this is an unconditional exit then just end processing here, - // otherwise we have to account for the possibility of the condition - // not being met, so continue processing the next instruction. - pc = MAX_PROGRAM_LENGTH - 1; - } - break; - - case Tegra::Shader::FlowCondition::Fcsm_Tr: - // TODO(bunnei): What is this used for? If we assume this conditon is not - // satisifed, dual vertex shaders in Farming Simulator make more sense - UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); - break; - - default: - UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); - } - break; - } - case OpCode::Id::KIL: { - UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); - - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); - - bb.push_back(Operation(OperationCode::Discard)); - break; - } - case OpCode::Id::S2R: { - const Node value = [this, instr] { - switch (instr.sys20) { - case SystemVariable::LaneId: - return Operation(OperationCode::ThreadId); - case SystemVariable::InvocationId: - return Operation(OperationCode::InvocationId); - case SystemVariable::Ydirection: - uses_y_negate = true; - return Operation(OperationCode::YNegate); - case SystemVariable::InvocationInfo: - LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); - return Immediate(0x00ff'0000U); - case SystemVariable::WscaleFactorXY: - UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); - return Immediate(0U); - case SystemVariable::WscaleFactorZ: - UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); - return Immediate(0U); - case SystemVariable::Tid: { - Node val = Immediate(0); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); - return val; - } - case SystemVariable::TidX: - return Operation(OperationCode::LocalInvocationIdX); - case SystemVariable::TidY: - return Operation(OperationCode::LocalInvocationIdY); - case SystemVariable::TidZ: - return Operation(OperationCode::LocalInvocationIdZ); - case SystemVariable::CtaIdX: - return Operation(OperationCode::WorkGroupIdX); - case SystemVariable::CtaIdY: - return Operation(OperationCode::WorkGroupIdY); - case SystemVariable::CtaIdZ: - return Operation(OperationCode::WorkGroupIdZ); - case SystemVariable::EqMask: - case SystemVariable::LtMask: - case SystemVariable::LeMask: - case SystemVariable::GtMask: - case SystemVariable::GeMask: - uses_warps = true; - switch (instr.sys20) { - case SystemVariable::EqMask: - return Operation(OperationCode::ThreadEqMask); - case SystemVariable::LtMask: - return Operation(OperationCode::ThreadLtMask); - case SystemVariable::LeMask: - return Operation(OperationCode::ThreadLeMask); - case SystemVariable::GtMask: - return Operation(OperationCode::ThreadGtMask); - case SystemVariable::GeMask: - return Operation(OperationCode::ThreadGeMask); - default: - UNREACHABLE(); - return Immediate(0u); - } - default: - UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); - return Immediate(0u); - } - }(); - SetRegister(bb, instr.gpr0, value); - - break; - } - case OpCode::Id::BRA: { - Node branch; - if (instr.bra.constant_buffer == 0) { - const u32 target = pc + instr.bra.GetBranchTarget(); - branch = Operation(OperationCode::Branch, Immediate(target)); - } else { - const u32 target = pc + 1; - const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - const Node operand = - Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - branch = Operation(OperationCode::BranchIndirect, operand); - } - - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - if (cc != Tegra::Shader::ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::BRX: { - Node operand; - if (instr.brx.constant_buffer != 0) { - const s32 target = pc + 1; - const Node index = GetRegister(instr.gpr8); - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } else { - const s32 target = pc + instr.brx.GetBranchExtend(); - const Node op_a = GetRegister(instr.gpr8); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } - const Node branch = Operation(OperationCode::BranchIndirect, operand); - - const ConditionCode cc = instr.flow_condition_code; - if (cc != ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::SSY: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer flow is not supported"); - - if (disable_flow_stack) { - break; - } - - // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); - break; - } - case OpCode::Id::PBK: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer PBK is not supported"); - - if (disable_flow_stack) { - break; - } - - // PBK pushes to a stack the address where BRK will jump to. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); - break; - } - case OpCode::Id::SYNC: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); - - if (decompiled) { - break; - } - - // The SYNC opcode jumps to the address previously set by the SSY opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); - break; - } - case OpCode::Id::BRK: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); - if (decompiled) { - break; - } - - // The BRK opcode jumps to the address previously set by the PBK opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); - break; - } - case OpCode::Id::IPA: { - const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; - const auto attribute = instr.attribute.fmt28; - const Index index = attribute.index; - - Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) - : GetInputAttribute(index, attribute.element); - - // Code taken from Ryujinx. - if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { - const u32 location = static_cast(index) - static_cast(Index::Attribute_0); - if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { - Node position_w = GetInputAttribute(Index::Position, 3); - value = Operation(OperationCode::FMul, move(value), move(position_w)); - } - } - - if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { - value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); - } - - value = GetSaturatedFloat(move(value), instr.ipa.saturate); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::OUT_R: { - UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, - "Stream buffer is not supported"); - - if (instr.out.emit) { - // gpr0 is used to store the next address and gpr8 contains the address to emit. - // Hardware uses pointers here but we just ignore it - bb.push_back(Operation(OperationCode::EmitVertex)); - SetRegister(bb, instr.gpr0, Immediate(0)); - } - if (instr.out.cut) { - bb.push_back(Operation(OperationCode::EndPrimitive)); - } - break; - } - case OpCode::Id::ISBERD: { - UNIMPLEMENTED_IF(instr.isberd.o != 0); - UNIMPLEMENTED_IF(instr.isberd.skew != 0); - UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); - UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); - LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); - SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); - break; - } - case OpCode::Id::BAR: { - UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); - bb.push_back(Operation(OperationCode::Barrier)); - break; - } - case OpCode::Id::MEMBAR: { - UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); - const OperationCode type = [instr] { - switch (instr.membar.type) { - case Tegra::Shader::MembarType::CTA: - return OperationCode::MemoryBarrierGroup; - case Tegra::Shader::MembarType::GL: - return OperationCode::MemoryBarrierGlobal; - default: - UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); - return OperationCode::MemoryBarrierGlobal; - } - }(); - bb.push_back(Operation(type)); - break; - } - case OpCode::Id::DEPBAR: { - LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp deleted file mode 100644 index 9290d22eb..000000000 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::PSETP: { - const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); - - // We can't use the constant predicate as destination. - ASSERT(instr.psetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); - const Node predicate = Operation(combiner, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); - - if (instr.psetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if - // enabled - SetPredicate(bb, instr.psetp.pred0, - Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), - second_pred)); - } - break; - } - case OpCode::Id::CSETP: { - const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); - const Node condition_code = GetConditionCode(instr.csetp.cc); - - const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); - - if (instr.csetp.pred3 != static_cast(Pred::UnusedIndex)) { - SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); - } - if (instr.csetp.pred0 != static_cast(Pred::UnusedIndex)) { - const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); - SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp deleted file mode 100644 index 84dbc50fe..000000000 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in PSET is not implemented"); - - const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); - const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); - - const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.pset.op); - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); - const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.pset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp deleted file mode 100644 index 6116c31aa..000000000 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { -constexpr u64 NUM_CONDITION_CODES = 4; -constexpr u64 NUM_PREDICATES = 7; -} // namespace - -u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node apply_mask = [this, opcode, instr] { - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: - case OpCode::Id::P2R_IMM: - return Immediate(static_cast(instr.p2r_r2p.immediate_mask)); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const u32 offset = static_cast(instr.p2r_r2p.byte) * 8; - - const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; - const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; - const auto get_entry = [this, cc](u64 entry) { - return cc ? GetInternalFlag(static_cast(entry)) : GetPredicate(entry); - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: { - Node mask = GetRegister(instr.gpr8); - - for (u64 entry = 0; entry < num_entries; ++entry) { - const u32 shift = static_cast(entry); - - Node apply = BitfieldExtract(apply_mask, shift, 1); - Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); - - Node compare = BitfieldExtract(mask, offset + shift, 1); - Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - - Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); - bb.push_back(Conditional(condition, {move(code)})); - } - break; - } - case OpCode::Id::P2R_IMM: { - Node value = Immediate(0); - for (u64 entry = 0; entry < num_entries; ++entry) { - Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), - Immediate(0)); - value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); - } - value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); - value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp deleted file mode 100644 index a53819c15..000000000 --- a/src/video_core/shader/decode/shift.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::ShfType; -using Tegra::Shader::ShfXmode; - -namespace { - -Node IsFull(Node shift) { - return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); -} - -Node Shift(OperationCode opcode, Node value, Node shift) { - Node shifted = Operation(opcode, move(value), shift); - return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); -} - -Node ClampShift(Node shift, s32 size = 32) { - shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); - return Operation(OperationCode::IMin, move(shift), Immediate(size)); -} - -Node WrapShift(Node shift, s32 size = 32) { - return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); -} - -Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); - Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); - } - - // And these when it's larger than or 32 - const bool is_signed = type == ShfType::S64; - const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(opcode, high, move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); - Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); - } - - // And these when it's larger than or 32 - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [this, instr] { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (const auto opid = opcode->get().GetId(); opid) { - case OpCode::Id::SHR_C: - case OpCode::Id::SHR_R: - case OpCode::Id::SHR_IMM: { - op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); - - Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, - move(op_a), move(op_b)); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHL_C: - case OpCode::Id::SHL_R: - case OpCode::Id::SHL_IMM: { - Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHF_RIGHT_R: - case OpCode::Id::SHF_RIGHT_IMM: - case OpCode::Id::SHF_LEFT_R: - case OpCode::Id::SHF_LEFT_IMM: { - UNIMPLEMENTED_IF(instr.generates_cc); - UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", - instr.shf.xmode.Value()); - - if (instr.is_b_imm) { - op_b = Immediate(static_cast(instr.shf.immediate)); - } - const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; - Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); - - Node negated_shift = Operation(OperationCode::INegate, shift); - Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); - - const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; - Node value = (is_right ? ShiftRight : ShiftLeft)( - move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp deleted file mode 100644 index c69681e8d..000000000 --- a/src/video_core/shader/decode/texture.cpp +++ /dev/null @@ -1,935 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::TextureMiscMode; -using Tegra::Shader::TextureProcessMode; -using Tegra::Shader::TextureType; - -static std::size_t GetCoordCount(TextureType texture_type) { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::Texture3D: - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); - return 0; - } -} - -u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - bool is_bindless = false; - switch (opcode->get().GetId()) { - case OpCode::Id::TEX: { - const TextureType texture_type{instr.tex.texture_type}; - const bool is_array = instr.tex.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex.GetTextureProcessMode(); - WriteTexInstructionFloat( - bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); - break; - } - case OpCode::Id::TEX_B: { - UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const TextureType texture_type{instr.tex_b.texture_type}; - const bool is_array = instr.tex_b.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex_b.GetTextureProcessMode(); - WriteTexInstructionFloat(bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, - is_array, is_aoffi, {instr.gpr20})); - break; - } - case OpCode::Id::TEXS: { - const TextureType texture_type{instr.texs.GetTextureType()}; - const bool is_array{instr.texs.IsArrayTexture()}; - const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.texs.GetTextureProcessMode(); - - const Node4 components = - GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); - - if (instr.texs.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - case OpCode::Id::TLD4_B: { - is_bindless = true; - [[fallthrough]]; - } - case OpCode::Id::TLD4: { - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), - "NDV is not implemented"); - const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) - : instr.tld4.UsesMiscMode(TextureMiscMode::DC); - const bool is_array = instr.tld4.array != 0; - const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) - : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) - : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); - WriteTexInstructionFloat(bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, - is_ptp, is_bindless)); - break; - } - case OpCode::Id::TLD4S: { - constexpr std::size_t num_coords = 2; - const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = GetRegister(instr.gpr20); - - // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. - std::vector coords; - std::vector aoffi; - Node depth_compare; - if (is_depth_compare) { - // Note: TLD4S coordinate encoding works just like TEXS's - const Node op_y = GetRegister(instr.gpr8.Value() + 1); - coords.push_back(op_a); - coords.push_back(op_y); - if (is_aoffi) { - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - depth_compare = GetRegister(instr.gpr20.Value() + 1); - } else { - depth_compare = op_b; - } - } else { - // There's no depth compare - coords.push_back(op_a); - if (is_aoffi) { - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - } else { - coords.push_back(op_b); - } - } - const Node component = Immediate(static_cast(instr.tld4s.component)); - - SamplerInfo info; - info.is_shadow = is_depth_compare; - const std::optional sampler = GetSampler(instr.sampler, info); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {}, - {}, {}, component, element, {}}; - values[element] = Operation(OperationCode::TextureGather, meta, coords); - } - - if (instr.tld4s.fp16_flag) { - WriteTexsInstructionHalfFloat(bb, instr, values, true); - } else { - WriteTexsInstructionFloat(bb, instr, values, true); - } - break; - } - case OpCode::Id::TXD_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXD: { - UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const bool is_array = instr.txd.is_array != 0; - const auto derivate_reg = instr.gpr20.Value(); - const auto texture_type = instr.txd.texture_type.Value(); - const auto coord_count = GetCoordCount(texture_type); - u64 base_reg = instr.gpr8.Value(); - Node index_var; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - const std::optional sampler = - is_bindless ? GetBindlessSampler(base_reg, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); - WriteTexInstructionFloat(bb, instr, values); - break; - } - - if (is_bindless) { - base_reg++; - } - - std::vector coords; - std::vector derivates; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(base_reg + i)); - const std::size_t derivate = i * 2; - derivates.push_back(GetRegister(derivate_reg + derivate)); - derivates.push_back(GetRegister(derivate_reg + derivate + 1)); - } - - Node array_node = {}; - if (is_array) { - const Node info_reg = GetRegister(base_reg + coord_count); - array_node = BitfieldExtract(info_reg, 0, 16); - } - - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, - {}, {}, {}, element, index_var}; - values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); - } - - WriteTexInstructionFloat(bb, instr, values); - - break; - } - case OpCode::Id::TXQ_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXQ: { - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) - : GetSampler(instr.sampler, {}); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - u32 indexer = 0; - switch (instr.txq.query_type) { - case Tegra::Shader::TextureQueryType::Dimension: { - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, - GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); - } - break; - } - case OpCode::Id::TMML_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TMML: { - UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), - "NDV is not implemented"); - - const auto texture_type = instr.tmml.texture_type.Value(); - const bool is_array = instr.tmml.array != 0; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) - : GetSampler(instr.sampler, info); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - const u64 base_index = is_array ? 1 : 0; - const u64 num_components = [texture_type] { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); - return 2; - } - }(); - // TODO: What's the array component used for? - - std::vector coords; - coords.reserve(num_components); - for (u64 component = 0; component < num_components; ++component) { - coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); - } - - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - Node value = Operation(OperationCode::TextureQueryLod, meta, coords); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::TLD: { - UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); - - WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); - break; - } - case OpCode::Id::TLDS: { - const TextureType texture_type{instr.tlds.GetTextureType()}; - const bool is_array{instr.tlds.IsArrayTexture()}; - - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - - const Node4 components = GetTldsCode(instr, texture_type, is_array); - - if (instr.tlds.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( - SamplerInfo info, std::optional sampler) { - if (info.IsComplete()) { - return info; - } - if (!sampler) { - LOG_WARNING(HW_GPU, "Unknown sampler info"); - info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); - info.is_array = info.is_array.value_or(false); - info.is_shadow = info.is_shadow.value_or(false); - info.is_buffer = info.is_buffer.value_or(false); - return info; - } - info.type = info.type.value_or(sampler->texture_type); - info.is_array = info.is_array.value_or(sampler->is_array != 0); - info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0); - info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0); - return info; -} - -std::optional ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, - SamplerInfo sampler_info) { - const u32 offset = static_cast(sampler.index.Value()); - const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [offset](const SamplerEntry& entry) { return entry.offset == offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); -} - -std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, - SamplerInfo info, Node& index_var) { - const Node sampler_register = GetRegister(reg); - const auto [base_node, tracked_sampler_info] = - TrackBindlessSampler(sampler_register, global_code, static_cast(global_code.size())); - if (!base_node) { - UNREACHABLE(); - return std::nullopt; - } - - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const u32 buffer = sampler_info->index; - const u32 offset = sampler_info->offset; - info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer, offset](const SamplerEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); - } - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const std::pair indices = sampler_info->indices; - const std::pair offsets = sampler_info->offsets; - info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); - - // Try to use an already created sampler if it exists - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [indices, offsets](const SamplerEntry& entry) { - return offsets == std::pair{entry.offset, entry.secondary_offset} && - indices == std::pair{entry.buffer, entry.secondary_buffer}; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const u32 next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer); - } - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const u32 base_offset = sampler_info->base_offset / 4; - index_var = GetCustomVariable(sampler_info->bindless_var); - info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if( - used_samplers.begin(), used_samplers.end(), - [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && - it->is_indexed); - return *it; - } - - uses_indexed_samplers = true; - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, true); - } - return std::nullopt; -} - -void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { - u32 dest_elem = 0; - for (u32 elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - SetTemporary(bb, dest_elem++, components[elem]); - } - // After writing values in temporals, move them to the real registers - for (u32 i = 0; i < dest_elem; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } -} - -void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, - bool ignore_mask) { - // TEXS has two destination registers and a swizzle. The first two elements in the swizzle - // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 - - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - SetTemporary(bb, dest_elem++, components[component]); - } - - for (u32 i = 0; i < dest_elem; ++i) { - if (i < 2) { - // Write the first two swizzle components to gpr0 and gpr0+1 - SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); - } else { - ASSERT(instr.texs.HasTwoDestinations()); - // Write the rest of the swizzle components to gpr28 and gpr28+1 - SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); - } - } -} - -void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, - const Node4& components, bool ignore_mask) { - // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half - // float instruction). - - Node4 values; - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - values[dest_elem++] = components[component]; - } - if (dest_elem == 0) - return; - - std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); - - const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); - if (dest_elem <= 2) { - SetRegister(bb, instr.gpr0, first_value); - return; - } - - SetTemporary(bb, 0, first_value); - SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); - - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr28, GetTemporary(1)); -} - -Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, - std::vector aoffi, - std::optional bindless_reg) { - const bool is_array = array != nullptr; - const bool is_shadow = depth_compare != nullptr; - const bool is_bindless = bindless_reg.has_value(); - - ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, - "Illegal texture type"); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = is_shadow; - info.is_buffer = false; - - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) - : GetSampler(instr.sampler, info); - if (!sampler) { - return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; - } - - const bool lod_needed = process_mode == TextureProcessMode::LZ || - process_mode == TextureProcessMode::LL || - process_mode == TextureProcessMode::LLA; - const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; - - Node bias; - Node lod; - switch (process_mode) { - case TextureProcessMode::None: - break; - case TextureProcessMode::LZ: - lod = Immediate(0.0f); - break; - case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register indexed by the gpr20 field with - // an offset depending on the usage of the other registers. - bias = GetRegister(instr.gpr20.Value() + bias_offset); - break; - case TextureProcessMode::LL: - lod = GetRegister(instr.gpr20.Value() + bias_offset); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); - break; - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, - lod, {}, element, index_var}; - values[element] = Operation(opcode, meta, coords); - } - - return values; -} - -Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi, std::optional bindless_reg) { - const bool lod_bias_enabled{ - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; - - const bool is_bindless = bindless_reg.has_value(); - - u64 parameter_register = instr.gpr20.Value(); - if (is_bindless) { - ++parameter_register; - } - - const u32 bias_lod_offset = (is_bindless ? 1 : 0); - if (lod_bias_enabled) { - ++parameter_register; - } - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 5); - const auto coord_count = std::get<0>(coord_counts); - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - // 1D.DC in OpenGL the 2nd component is ignored. - if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { - coords.push_back(Immediate(0.0f)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - std::vector aoffi; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); - } - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - dc = GetRegister(parameter_register++); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, - aoffi, bindless_reg); -} - -Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 4); - const auto coord_count = std::get<0>(coord_counts); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - const u64 last_coord_register = - (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) - ? static_cast(instr.gpr20.Value()) - : coord_register + 1; - const u32 bias_offset = coord_count > 2 ? 1 : 0; - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - const bool last = (i == (coord_count - 1)) && (coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - dc = GetRegister(depth_register); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, - {}); -} - -Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { - ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); - - const std::size_t coord_count = GetCoordCount(texture_type); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - - u64 parameter_register = instr.gpr20.Value(); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = depth_compare; - - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - for (u32 element = 0; element < values.size(); ++element) { - values[element] = Immediate(0); - } - return values; - } - - std::vector aoffi, ptp; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); - } else if (is_ptp) { - ptp = GetPtpCoordinates( - {GetRegister(parameter_register++), GetRegister(parameter_register++)}); - } - - Node dc; - if (depth_compare) { - dc = GetRegister(parameter_register++); - } - - const Node component = is_bindless ? Immediate(static_cast(instr.tld4_b.component)) - : Immediate(static_cast(instr.tld4.component)); - - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{ - *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, - index_var}; - values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { - const auto texture_type{instr.tld.texture_type}; - const bool is_array{instr.tld.is_array != 0}; - const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; - const std::size_t coord_count{GetCoordCount(texture_type)}; - - u64 gpr8_cursor{instr.gpr8.Value()}; - const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; - - std::vector coords; - coords.reserve(coord_count); - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(gpr8_cursor++)); - } - - u64 gpr20_cursor{instr.gpr20.Value()}; - // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; - const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; - // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; - // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - - const std::optional sampler = GetSampler(instr.sampler, {}); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = false; - const std::optional sampler = GetSampler(instr.sampler, info); - - const std::size_t type_coord_count = GetCoordCount(texture_type); - const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; - const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // if is array gpr20 is used - const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); - - const u64 last_coord_register = - ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array - ? static_cast(instr.gpr20.Value()) - : coord_register + 1; - - std::vector coords; - for (std::size_t i = 0; i < type_coord_count; ++i) { - const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - coords.push_back( - GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - // When lod is used always is in gpr20 - const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - - std::vector aoffi; - if (aoffi_enabled) { - aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - return values; -} - -std::tuple ShaderIR::ValidateAndGetCoordinateElement( - TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, - std::size_t max_coords, std::size_t max_inputs) { - const std::size_t coord_count = GetCoordCount(texture_type); - - std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); - if (total_coord_count > max_coords || total_reg_count > max_inputs) { - UNIMPLEMENTED_MSG("Unsupported Texture operation"); - total_coord_count = std::min(total_coord_count, max_coords); - } - // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. - total_coord_count += - (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; - - return {coord_count, total_coord_count}; -} - -std::vector ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, - bool is_tld4) { - const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; - const u32 size = is_tld4 ? 6 : 4; - const s32 wrap_value = is_tld4 ? 32 : 8; - const s32 diff_value = is_tld4 ? 64 : 16; - const u32 mask = (1U << size) - 1; - - std::vector aoffi; - aoffi.reserve(coord_count); - - const auto aoffi_immediate{ - TrackImmediate(aoffi_reg, global_code, static_cast(global_code.size()))}; - if (!aoffi_immediate) { - // Variable access, not supported on AMD. - LOG_WARNING(HW_GPU, - "AOFFI constant folding failed, some hardware might have graphical issues"); - for (std::size_t coord = 0; coord < coord_count; ++coord) { - const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); - aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return aoffi; - } - - for (std::size_t coord = 0; coord < coord_count; ++coord) { - s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; - if (value >= wrap_value) { - value -= diff_value; - } - aoffi.push_back(Immediate(value)); - } - return aoffi; -} - -std::vector ShaderIR::GetPtpCoordinates(std::array ptp_regs) { - static constexpr u32 num_entries = 8; - - std::vector ptp; - ptp.reserve(num_entries); - - const auto global_size = static_cast(global_code.size()); - const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); - const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); - if (!low || !high) { - for (u32 entry = 0; entry < num_entries; ++entry) { - const u32 reg = entry / 4; - const u32 offset = entry % 4; - const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); - ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return ptp; - } - - const u64 immediate = (static_cast(*high) << 32) | static_cast(*low); - for (u32 entry = 0; entry < num_entries; ++entry) { - s32 value = (immediate >> (entry * 8)) & 0b111111; - if (value >= 32) { - value -= 64; - } - ptp.push_back(Immediate(value)); - } - - return ptp; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp deleted file mode 100644 index 1c0957277..000000000 --- a/src/video_core/shader/decode/video.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::VideoType; -using Tegra::Shader::VmadShr; -using Tegra::Shader::VmnmxOperation; -using Tegra::Shader::VmnmxType; - -u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::VMNMX) { - DecodeVMNMX(bb, instr); - return pc; - } - - const Node op_a = - GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, - instr.video.type_a, instr.video.byte_height_a); - const Node op_b = [this, instr] { - if (instr.video.use_register_b) { - return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, - instr.video.signed_b, instr.video.type_b, - instr.video.byte_height_b); - } - if (instr.video.signed_b) { - const auto imm = static_cast(instr.alu.GetImm20_16()); - return Immediate(static_cast(imm)); - } else { - return Immediate(instr.alu.GetImm20_16()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::VMAD: { - const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node op_c = GetRegister(instr.gpr39); - - Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); - value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); - - if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { - const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); - value = - SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::VSETP: { - // We can't use the constant predicate as destination. - ASSERT(instr.vsetp.pred3 != static_cast(Pred::UnusedIndex)); - - const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); - const Node second_pred = GetPredicate(instr.vsetp.pred39, false); - - const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); - - if (instr.vsetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); - SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, - u64 byte_height) { - if (!is_chunk) { - return BitfieldExtract(op, static_cast(byte_height * 8), 8); - } - - switch (type) { - case VideoType::Size16_Low: - return BitfieldExtract(op, 0, 16); - case VideoType::Size16_High: - return BitfieldExtract(op, 16, 16); - case VideoType::Size32: - // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used - // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. - UNIMPLEMENTED(); - return Immediate(0); - case VideoType::Invalid: - UNREACHABLE_MSG("Invalid instruction encoding"); - return Immediate(0); - default: - UNREACHABLE(); - return Immediate(0); - } -} - -void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { - UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); - UNIMPLEMENTED_IF(instr.vmnmx.sat); - UNIMPLEMENTED_IF(instr.generates_cc); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node op_c = GetRegister(instr.gpr39); - - const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed - const bool is_oper2_signed = instr.vmnmx.is_dest_signed; - - const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; - Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); - - switch (instr.vmnmx.operation) { - case VmnmxOperation::Mrg_16H: - value = BitfieldInsert(move(op_c), move(value), 16, 16); - break; - case VmnmxOperation::Mrg_16L: - value = BitfieldInsert(move(op_c), move(value), 0, 16); - break; - case VmnmxOperation::Mrg_8B0: - value = BitfieldInsert(move(op_c), move(value), 0, 8); - break; - case VmnmxOperation::Mrg_8B2: - value = BitfieldInsert(move(op_c), move(value), 16, 8); - break; - case VmnmxOperation::Acc: - value = Operation(OperationCode::IAdd, move(value), move(op_c)); - break; - case VmnmxOperation::Min: - value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Max: - value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Nop: - break; - default: - UNREACHABLE(); - break; - } - - SetRegister(bb, instr.gpr0, move(value)); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp deleted file mode 100644 index 37433d783..000000000 --- a/src/video_core/shader/decode/warp.cpp +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::ShuffleOperation; -using Tegra::Shader::VoteOperation; - -namespace { - -OperationCode GetOperationCode(VoteOperation vote_op) { - switch (vote_op) { - case VoteOperation::All: - return OperationCode::VoteAll; - case VoteOperation::Any: - return OperationCode::VoteAny; - case VoteOperation::Eq: - return OperationCode::VoteEqual; - default: - UNREACHABLE_MSG("Invalid vote operation={}", vote_op); - return OperationCode::VoteAll; - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - // Signal the backend that this shader uses warp instructions. - uses_warps = true; - - switch (opcode->get().GetId()) { - case OpCode::Id::VOTE: { - const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); - const Node active = Operation(OperationCode::BallotThread, value); - const Node vote = Operation(GetOperationCode(instr.vote.operation), value); - SetRegister(bb, instr.gpr0, active); - SetPredicate(bb, instr.vote.dest_pred, vote); - break; - } - case OpCode::Id::SHFL: { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); - Node index = instr.shfl.is_index_imm ? Immediate(static_cast(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - - Node thread_id = Operation(OperationCode::ThreadId); - Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); - Node seg_mask = BitfieldExtract(mask, 8, 16); - - Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); - Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); - Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, - Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); - - Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { - switch (instr.shfl.operation) { - case ShuffleOperation::Idx: - return Operation(OperationCode::IBitwiseOr, - Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), - min_thread_id); - case ShuffleOperation::Down: - return Operation(OperationCode::IAdd, thread_id, index); - case ShuffleOperation::Up: - return Operation(OperationCode::IAdd, thread_id, - Operation(OperationCode::INegate, index)); - case ShuffleOperation::Bfly: - return Operation(OperationCode::IBitwiseXor, thread_id, index); - } - UNREACHABLE(); - return Immediate(0U); - }(); - - Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); - } else { - return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); - } - }(); - - SetPredicate(bb, instr.shfl.pred48, in_bounds); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); - break; - } - case OpCode::Id::FSWZADD: { - UNIMPLEMENTED_IF(instr.fswzadd.ndv); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node mask = Immediate(static_cast(instr.fswzadd.swizzle)); - SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp deleted file mode 100644 index 233b8fa42..000000000 --- a/src/video_core/shader/decode/xmad.cpp +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF(instr.xmad.sign_a); - UNIMPLEMENTED_IF(instr.xmad.sign_b); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in XMAD is not implemented"); - - Node op_a = GetRegister(instr.gpr8); - - // TODO(bunnei): Needs to be fixed once op_a or op_b is signed - UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); - const bool is_signed_a = instr.xmad.sign_a == 1; - const bool is_signed_b = instr.xmad.sign_b == 1; - const bool is_signed_c = is_signed_a; - - auto [is_merge, is_psl, is_high_b, mode, op_b_binding, - op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::XMAD_CR: - return {instr.xmad.merge_56, - instr.xmad.product_shift_left_second, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RR: - return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, - instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RC: - return {false, - false, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::XMAD_IMM: - return {instr.xmad.merge_37, - instr.xmad.product_shift_left, - false, - instr.xmad.mode, - Immediate(static_cast(instr.xmad.imm20_16)), - GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); - return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; - } - }(); - - op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), - instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); - - const Node original_b = op_b_binding; - const Node op_b = - SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), - is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); - - // we already check sign_a and sign_b is difference or not before so just use one in here. - Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); - if (is_psl) { - product = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); - } - SetTemporary(bb, 0, product); - product = GetTemporary(0); - - Node original_c = op_c; - const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error - op_c = [&] { - switch (set_mode) { - case Tegra::Shader::XmadMode::None: - return original_c; - case Tegra::Shader::XmadMode::CLo: - return BitfieldExtract(std::move(original_c), 0, 16); - case Tegra::Shader::XmadMode::CHi: - return BitfieldExtract(std::move(original_c), 16, 16); - case Tegra::Shader::XmadMode::CBcc: { - Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, - original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), - std::move(shifted_b)); - } - case Tegra::Shader::XmadMode::CSfu: { - const Node comp_a = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); - const Node comp_b = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); - const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); - - const Node comp_minus_a = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_a, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, - Immediate(0x80000000)), - Immediate(0)); - const Node comp_minus_b = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_b, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, - Immediate(0x80000000)), - Immediate(0)); - - Node new_c = Operation( - OperationCode::Select, comp_minus_a, - SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), - original_c); - new_c = Operation( - OperationCode::Select, comp_minus_b, - SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), - std::move(new_c)); - - return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); - } - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - SetTemporary(bb, 1, op_c); - op_c = GetTemporary(1); - - // TODO(Rodrigo): Use an appropiate sign for this operation - Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); - SetTemporary(bb, 2, sum); - sum = GetTemporary(2); - if (is_merge) { - const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), - Immediate(0), Immediate(16)); - const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, - Immediate(16)); - sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); - } - - SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(sum)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp deleted file mode 100644 index 2647865d4..000000000 --- a/src/video_core/shader/expr.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { -namespace { -bool ExprIsBoolean(const Expr& expr) { - return std::holds_alternative(*expr); -} - -bool ExprBooleanGet(const Expr& expr) { - return std::get_if(expr.get())->value; -} -} // Anonymous namespace - -bool ExprAnd::operator==(const ExprAnd& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprAnd::operator!=(const ExprAnd& b) const { - return !operator==(b); -} - -bool ExprOr::operator==(const ExprOr& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprOr::operator!=(const ExprOr& b) const { - return !operator==(b); -} - -bool ExprNot::operator==(const ExprNot& b) const { - return *operand1 == *b.operand1; -} - -bool ExprNot::operator!=(const ExprNot& b) const { - return !operator==(b); -} - -Expr MakeExprNot(Expr first) { - if (std::holds_alternative(*first)) { - return std::get_if(first.get())->operand1; - } - return MakeExpr(std::move(first)); -} - -Expr MakeExprAnd(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? second : first; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? first : second; - } - return MakeExpr(std::move(first), std::move(second)); -} - -Expr MakeExprOr(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? first : second; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? second : first; - } - return MakeExpr(std::move(first), std::move(second)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second) { - return (*first) == (*second); -} - -bool ExprAreOpposite(const Expr& first, const Expr& second) { - if (std::holds_alternative(*first)) { - return ExprAreEqual(std::get_if(first.get())->operand1, second); - } - if (std::holds_alternative(*second)) { - return ExprAreEqual(std::get_if(second.get())->operand1, first); - } - return false; -} - -bool ExprIsTrue(const Expr& first) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first); - } - return false; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h deleted file mode 100644 index cda284c72..000000000 --- a/src/video_core/shader/expr.h +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -class ExprAnd; -class ExprBoolean; -class ExprCondCode; -class ExprGprEqual; -class ExprNot; -class ExprOr; -class ExprPredicate; -class ExprVar; - -using ExprData = std::variant; -using Expr = std::shared_ptr; - -class ExprAnd final { -public: - explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprAnd& b) const; - bool operator!=(const ExprAnd& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprOr final { -public: - explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprOr& b) const; - bool operator!=(const ExprOr& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprNot final { -public: - explicit ExprNot(Expr a) : operand1{std::move(a)} {} - - bool operator==(const ExprNot& b) const; - bool operator!=(const ExprNot& b) const; - - Expr operand1; -}; - -class ExprVar final { -public: - explicit ExprVar(u32 index) : var_index{index} {} - - bool operator==(const ExprVar& b) const { - return var_index == b.var_index; - } - - bool operator!=(const ExprVar& b) const { - return !operator==(b); - } - - u32 var_index; -}; - -class ExprPredicate final { -public: - explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} - - bool operator==(const ExprPredicate& b) const { - return predicate == b.predicate; - } - - bool operator!=(const ExprPredicate& b) const { - return !operator==(b); - } - - u32 predicate; -}; - -class ExprCondCode final { -public: - explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} - - bool operator==(const ExprCondCode& b) const { - return cc == b.cc; - } - - bool operator!=(const ExprCondCode& b) const { - return !operator==(b); - } - - ConditionCode cc; -}; - -class ExprBoolean final { -public: - explicit ExprBoolean(bool val) : value{val} {} - - bool operator==(const ExprBoolean& b) const { - return value == b.value; - } - - bool operator!=(const ExprBoolean& b) const { - return !operator==(b); - } - - bool value; -}; - -class ExprGprEqual final { -public: - explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} - - bool operator==(const ExprGprEqual& b) const { - return gpr == b.gpr && value == b.value; - } - - bool operator!=(const ExprGprEqual& b) const { - return !operator==(b); - } - - u32 gpr; - u32 value; -}; - -template -Expr MakeExpr(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second); - -bool ExprAreOpposite(const Expr& first, const Expr& second); - -Expr MakeExprNot(Expr first); - -Expr MakeExprAnd(Expr first, Expr second); - -Expr MakeExprOr(Expr first, Expr second); - -bool ExprIsTrue(const Expr& first); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp deleted file mode 100644 index e18ccba8e..000000000 --- a/src/video_core/shader/memory_util.cpp +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include - -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { - const auto& shader_config{maxwell3d.regs.shader_config[static_cast(program)]}; - return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; -} - -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; - static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; - - const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - std::size_t offset = start_offset; - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & MASK) == SELF_JUMPING_BRANCH) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - ++offset; - } - // The last instruction is included in the program size - return std::min(offset + 1, program.size()); -} - -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute) { - ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); - ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); - memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); - code.resize(CalculateProgramSize(code, is_compute)); - return code; -} - -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b) { - size_t unique_identifier = boost::hash_value(code); - if (is_a) { - // VertexA programs include two programs - boost::hash_combine(unique_identifier, boost::hash_value(code_b)); - } - return static_cast(unique_identifier); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h deleted file mode 100644 index 4624d38e6..000000000 --- a/src/video_core/shader/memory_util.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" - -namespace Tegra { -class MemoryManager; -} - -namespace VideoCommon::Shader { - -using ProgramCode = std::vector; - -constexpr u32 STAGE_MAIN_OFFSET = 10; -constexpr u32 KERNEL_MAIN_OFFSET = 0; - -/// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); - -/// Gets if the current instruction offset is a scheduler instruction -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); - -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); - -/// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute); - -/// Hashes one (or two) program streams -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b = {}); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h deleted file mode 100644 index b54d33763..000000000 --- a/src/video_core/shader/node.h +++ /dev/null @@ -1,701 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class OperationCode { - Assign, /// (float& dest, float src) -> void - - Select, /// (MetaArithmetic, bool pred, float a, float b) -> float - - FAdd, /// (MetaArithmetic, float a, float b) -> float - FMul, /// (MetaArithmetic, float a, float b) -> float - FDiv, /// (MetaArithmetic, float a, float b) -> float - FFma, /// (MetaArithmetic, float a, float b, float c) -> float - FNegate, /// (MetaArithmetic, float a) -> float - FAbsolute, /// (MetaArithmetic, float a) -> float - FClamp, /// (MetaArithmetic, float value, float min, float max) -> float - FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float - FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float - FMin, /// (MetaArithmetic, float a, float b) -> float - FMax, /// (MetaArithmetic, float a, float b) -> float - FCos, /// (MetaArithmetic, float a) -> float - FSin, /// (MetaArithmetic, float a) -> float - FExp2, /// (MetaArithmetic, float a) -> float - FLog2, /// (MetaArithmetic, float a) -> float - FInverseSqrt, /// (MetaArithmetic, float a) -> float - FSqrt, /// (MetaArithmetic, float a) -> float - FRoundEven, /// (MetaArithmetic, float a) -> float - FFloor, /// (MetaArithmetic, float a) -> float - FCeil, /// (MetaArithmetic, float a) -> float - FTrunc, /// (MetaArithmetic, float a) -> float - FCastInteger, /// (MetaArithmetic, int a) -> float - FCastUInteger, /// (MetaArithmetic, uint a) -> float - FSwizzleAdd, /// (float a, float b, uint mask) -> float - - IAdd, /// (MetaArithmetic, int a, int b) -> int - IMul, /// (MetaArithmetic, int a, int b) -> int - IDiv, /// (MetaArithmetic, int a, int b) -> int - INegate, /// (MetaArithmetic, int a) -> int - IAbsolute, /// (MetaArithmetic, int a) -> int - IMin, /// (MetaArithmetic, int a, int b) -> int - IMax, /// (MetaArithmetic, int a, int b) -> int - ICastFloat, /// (MetaArithmetic, float a) -> int - ICastUnsigned, /// (MetaArithmetic, uint a) -> int - ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int - ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int - IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int - IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int - IBitwiseNot, /// (MetaArithmetic, int a) -> int - IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int - IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int - IBitCount, /// (MetaArithmetic, int) -> int - IBitMSB, /// (MetaArithmetic, int) -> int - - UAdd, /// (MetaArithmetic, uint a, uint b) -> uint - UMul, /// (MetaArithmetic, uint a, uint b) -> uint - UDiv, /// (MetaArithmetic, uint a, uint b) -> uint - UMin, /// (MetaArithmetic, uint a, uint b) -> uint - UMax, /// (MetaArithmetic, uint a, uint b) -> uint - UCastFloat, /// (MetaArithmetic, float a) -> uint - UCastSigned, /// (MetaArithmetic, int a) -> uint - ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint - ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseNot, /// (MetaArithmetic, uint a) -> uint - UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint - UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint - UBitCount, /// (MetaArithmetic, uint) -> uint - UBitMSB, /// (MetaArithmetic, uint) -> uint - - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 - - LogicalAssign, /// (bool& dst, bool src) -> void - LogicalAnd, /// (bool a, bool b) -> bool - LogicalOr, /// (bool a, bool b) -> bool - LogicalXor, /// (bool a, bool b) -> bool - LogicalNegate, /// (bool a) -> bool - LogicalPick2, /// (bool2 pair, uint index) -> bool - LogicalAnd2, /// (bool2 a) -> bool - - LogicalFOrdLessThan, /// (float a, float b) -> bool - LogicalFOrdEqual, /// (float a, float b) -> bool - LogicalFOrdLessEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterThan, /// (float a, float b) -> bool - LogicalFOrdNotEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterEqual, /// (float a, float b) -> bool - LogicalFOrdered, /// (float a, float b) -> bool - LogicalFUnordered, /// (float a, float b) -> bool - LogicalFUnordLessThan, /// (float a, float b) -> bool - LogicalFUnordEqual, /// (float a, float b) -> bool - LogicalFUnordLessEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterThan, /// (float a, float b) -> bool - LogicalFUnordNotEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterEqual, /// (float a, float b) -> bool - - LogicalILessThan, /// (int a, int b) -> bool - LogicalIEqual, /// (int a, int b) -> bool - LogicalILessEqual, /// (int a, int b) -> bool - LogicalIGreaterThan, /// (int a, int b) -> bool - LogicalINotEqual, /// (int a, int b) -> bool - LogicalIGreaterEqual, /// (int a, int b) -> bool - - LogicalULessThan, /// (uint a, uint b) -> bool - LogicalUEqual, /// (uint a, uint b) -> bool - LogicalULessEqual, /// (uint a, uint b) -> bool - LogicalUGreaterThan, /// (uint a, uint b) -> bool - LogicalUNotEqual, /// (uint a, uint b) -> bool - LogicalUGreaterEqual, /// (uint a, uint b) -> bool - - LogicalAddCarry, /// (uint a, uint b) -> bool - - Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - - Texture, /// (MetaTexture, float[N] coords) -> float4 - TextureLod, /// (MetaTexture, float[N] coords) -> float4 - TextureGather, /// (MetaTexture, float[N] coords) -> float4 - TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - TexelFetch, /// (MetaTexture, int[N], int) -> float4 - TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4 - - ImageLoad, /// (MetaImage, int[N] coords) -> void - ImageStore, /// (MetaImage, int[N] coords) -> void - - AtomicImageAdd, /// (MetaImage, int[N] coords) -> void - AtomicImageAnd, /// (MetaImage, int[N] coords) -> void - AtomicImageOr, /// (MetaImage, int[N] coords) -> void - AtomicImageXor, /// (MetaImage, int[N] coords) -> void - AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - - AtomicUExchange, /// (memory, uint) -> uint - AtomicUAdd, /// (memory, uint) -> uint - AtomicUMin, /// (memory, uint) -> uint - AtomicUMax, /// (memory, uint) -> uint - AtomicUAnd, /// (memory, uint) -> uint - AtomicUOr, /// (memory, uint) -> uint - AtomicUXor, /// (memory, uint) -> uint - - AtomicIExchange, /// (memory, int) -> int - AtomicIAdd, /// (memory, int) -> int - AtomicIMin, /// (memory, int) -> int - AtomicIMax, /// (memory, int) -> int - AtomicIAnd, /// (memory, int) -> int - AtomicIOr, /// (memory, int) -> int - AtomicIXor, /// (memory, int) -> int - - ReduceUAdd, /// (memory, uint) -> void - ReduceUMin, /// (memory, uint) -> void - ReduceUMax, /// (memory, uint) -> void - ReduceUAnd, /// (memory, uint) -> void - ReduceUOr, /// (memory, uint) -> void - ReduceUXor, /// (memory, uint) -> void - - ReduceIAdd, /// (memory, int) -> void - ReduceIMin, /// (memory, int) -> void - ReduceIMax, /// (memory, int) -> void - ReduceIAnd, /// (memory, int) -> void - ReduceIOr, /// (memory, int) -> void - ReduceIXor, /// (memory, int) -> void - - Branch, /// (uint branch_target) -> void - BranchIndirect, /// (uint branch_target) -> void - PushFlowStack, /// (uint branch_target) -> void - PopFlowStack, /// () -> void - Exit, /// () -> void - Discard, /// () -> void - - EmitVertex, /// () -> void - EndPrimitive, /// () -> void - - InvocationId, /// () -> int - YNegate, /// () -> float - LocalInvocationIdX, /// () -> uint - LocalInvocationIdY, /// () -> uint - LocalInvocationIdZ, /// () -> uint - WorkGroupIdX, /// () -> uint - WorkGroupIdY, /// () -> uint - WorkGroupIdZ, /// () -> uint - - BallotThread, /// (bool) -> uint - VoteAll, /// (bool) -> bool - VoteAny, /// (bool) -> bool - VoteEqual, /// (bool) -> bool - - ThreadId, /// () -> uint - ThreadEqMask, /// () -> uint - ThreadGeMask, /// () -> uint - ThreadGtMask, /// () -> uint - ThreadLeMask, /// () -> uint - ThreadLtMask, /// () -> uint - ShuffleIndexed, /// (uint value, uint index) -> uint - - Barrier, /// () -> void - MemoryBarrierGroup, /// () -> void - MemoryBarrierGlobal, /// () -> void - - Amount, -}; - -enum class InternalFlag { - Zero = 0, - Sign = 1, - Carry = 2, - Overflow = 3, - Amount = 4, -}; - -enum class MetaStackClass { - Ssy, - Pbk, -}; - -class OperationNode; -class ConditionalNode; -class GprNode; -class CustomVarNode; -class ImmediateNode; -class InternalFlagNode; -class PredicateNode; -class AbufNode; -class CbufNode; -class LmemNode; -class PatchNode; -class SmemNode; -class GmemNode; -class CommentNode; - -using NodeData = std::variant; -using Node = std::shared_ptr; -using Node4 = std::array; -using NodeBlock = std::vector; - -struct ArraySamplerNode; -struct BindlessSamplerNode; -struct SeparateSamplerNode; - -using TrackSamplerData = std::variant; -using TrackSampler = std::shared_ptr; - -struct SamplerEntry { - /// Bound samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, - bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, - is_buffer{is_buffer_}, is_indexed{is_indexed_} {} - - /// Separate sampler constructor - explicit SamplerEntry(u32 index_, std::pair offsets, std::pair buffers, - Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, - bool is_buffer_) - : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, - buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} - - /// Bindless samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, - bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { - } - - u32 index = 0; ///< Emulated index given for the this sampler. - u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. - u32 secondary_offset = 0; ///< Secondary offset in the const buffer. - u32 buffer = 0; ///< Buffer where the bindless sampler is read. - u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. - u32 size = 1; ///< Size of the sampler. - - Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. - bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. - bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. - bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. - bool is_separated = false; ///< Whether the image and sampler is separated or not. -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct ArraySamplerNode { - u32 index; - u32 base_offset; - u32 bindless_var; -}; - -/// Represents a tracked separate sampler image pair that was folded statically -struct SeparateSamplerNode { - std::pair indices; - std::pair offsets; -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct BindlessSamplerNode { - u32 index; - u32 offset; -}; - -struct ImageEntry { -public: - /// Bound images constructor - explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, type{type_} {} - - /// Bindless samplers constructor - explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} - - void MarkWrite() { - is_written = true; - } - - void MarkRead() { - is_read = true; - } - - void MarkAtomic() { - MarkWrite(); - MarkRead(); - is_atomic = true; - } - - u32 index = 0; - u32 offset = 0; - u32 buffer = 0; - - Tegra::Shader::ImageType type{}; - bool is_bindless = false; - bool is_written = false; - bool is_read = false; - bool is_atomic = false; -}; - -struct GlobalMemoryBase { - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - - [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { - return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); - } -}; - -/// Parameters describing an arithmetic operation -struct MetaArithmetic { - bool precise{}; ///< Whether the operation can be constraint or not -}; - -/// Parameters describing a texture sampler -struct MetaTexture { - SamplerEntry sampler; - Node array; - Node depth_compare; - std::vector aoffi; - std::vector ptp; - std::vector derivates; - Node bias; - Node lod; - Node component; - u32 element{}; - Node index; -}; - -struct MetaImage { - const ImageEntry& image; - std::vector values; - u32 element{}; -}; - -/// Parameters that modify an operation but are not part of any particular operand -using Meta = - std::variant; - -class AmendNode { -public: - [[nodiscard]] std::optional GetAmendIndex() const { - if (amend_index == amend_null_index) { - return std::nullopt; - } - return {amend_index}; - } - - void SetAmendIndex(std::size_t index) { - amend_index = index; - } - - void ClearAmend() { - amend_index = amend_null_index; - } - -private: - static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; - std::size_t amend_index{amend_null_index}; -}; - -/// Holds any kind of operation that can be done in the IR -class OperationNode final : public AmendNode { -public: - explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} - - explicit OperationNode(OperationCode code_, Meta meta_) - : OperationNode(code_, std::move(meta_), std::vector{}) {} - - explicit OperationNode(OperationCode code_, std::vector operands_) - : OperationNode(code_, Meta{}, std::move(operands_)) {} - - explicit OperationNode(OperationCode code_, Meta meta_, std::vector operands_) - : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} - - template - explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) - : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} - - [[nodiscard]] OperationCode GetCode() const { - return code; - } - - [[nodiscard]] const Meta& GetMeta() const { - return meta; - } - - [[nodiscard]] std::size_t GetOperandsCount() const { - return operands.size(); - } - - [[nodiscard]] const Node& operator[](std::size_t operand_index) const { - return operands.at(operand_index); - } - -private: - OperationCode code{}; - Meta meta{}; - std::vector operands; -}; - -/// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final : public AmendNode { -public: - explicit ConditionalNode(Node condition_, std::vector&& code_) - : condition{std::move(condition_)}, code{std::move(code_)} {} - - [[nodiscard]] const Node& GetCondition() const { - return condition; - } - - [[nodiscard]] const std::vector& GetCode() const { - return code; - } - -private: - Node condition; ///< Condition to be satisfied - std::vector code; ///< Code to execute -}; - -/// A general purpose register -class GprNode final { -public: - explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return static_cast(index); - } - -private: - Tegra::Shader::Register index{}; -}; - -/// A custom variable -class CustomVarNode final { -public: - explicit constexpr CustomVarNode(u32 index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return index; - } - -private: - u32 index{}; -}; - -/// A 32-bits value that represents an immediate value -class ImmediateNode final { -public: - explicit constexpr ImmediateNode(u32 value_) : value{value_} {} - - [[nodiscard]] constexpr u32 GetValue() const { - return value; - } - -private: - u32 value{}; -}; - -/// One of Maxwell's internal flags -class InternalFlagNode final { -public: - explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} - - [[nodiscard]] constexpr InternalFlag GetFlag() const { - return flag; - } - -private: - InternalFlag flag{}; -}; - -/// A predicate register, it can be negated without additional nodes -class PredicateNode final { -public: - explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) - : index{index_}, negated{negated_} {} - - [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { - return index; - } - - [[nodiscard]] constexpr bool IsNegated() const { - return negated; - } - -private: - Tegra::Shader::Pred index{}; - bool negated{}; -}; - -/// Attribute buffer memory (known as attributes or varyings in GLSL terms) -class AbufNode final { -public: - // Initialize for standard attributes (index is explicit). - explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) - : buffer{std::move(buffer_)}, index{index_}, element{element_} {} - - // Initialize for physical attributes (index is a variable value). - explicit AbufNode(Node physical_address_, Node buffer_ = {}) - : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} - - [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { - return index; - } - - [[nodiscard]] u32 GetElement() const { - return element; - } - - [[nodiscard]] const Node& GetBuffer() const { - return buffer; - } - - [[nodiscard]] bool IsPhysicalBuffer() const { - return static_cast(physical_address); - } - - [[nodiscard]] const Node& GetPhysicalAddress() const { - return physical_address; - } - -private: - Node physical_address; - Node buffer; - Tegra::Shader::Attribute::Index index{}; - u32 element{}; -}; - -/// Patch memory (used to communicate tessellation stages). -class PatchNode final { -public: - explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} - - [[nodiscard]] constexpr u32 GetOffset() const { - return offset; - } - -private: - u32 offset{}; -}; - -/// Constant buffer node, usually mapped to uniform buffers in GLSL -class CbufNode final { -public: - explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} - - [[nodiscard]] u32 GetIndex() const { - return index; - } - - [[nodiscard]] const Node& GetOffset() const { - return offset; - } - -private: - u32 index{}; - Node offset; -}; - -/// Local memory node -class LmemNode final { -public: - explicit LmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Shared memory node -class SmemNode final { -public: - explicit SmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Global memory node -class GmemNode final { -public: - explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) - : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, - descriptor{descriptor_} {} - - [[nodiscard]] const Node& GetRealAddress() const { - return real_address; - } - - [[nodiscard]] const Node& GetBaseAddress() const { - return base_address; - } - - [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { - return descriptor; - } - -private: - Node real_address; - Node base_address; - GlobalMemoryBase descriptor; -}; - -/// Commentary, can be dropped -class CommentNode final { -public: - explicit CommentNode(std::string text_) : text{std::move(text_)} {} - - [[nodiscard]] const std::string& GetText() const { - return text; - } - -private: - std::string text; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp deleted file mode 100644 index 6a5b6940d..000000000 --- a/src/video_core/shader/node_helper.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -Node Conditional(Node condition, std::vector code) { - return MakeNode(std::move(condition), std::move(code)); -} - -Node Comment(std::string text) { - return MakeNode(std::move(text)); -} - -Node Immediate(u32 value) { - return MakeNode(value); -} - -Node Immediate(s32 value) { - return Immediate(static_cast(value)); -} - -Node Immediate(f32 value) { - u32 integral; - std::memcpy(&integral, &value, sizeof(u32)); - return Immediate(integral); -} - -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { - if (is_signed) { - return operation_code; - } - switch (operation_code) { - case OperationCode::FCastInteger: - return OperationCode::FCastUInteger; - case OperationCode::IAdd: - return OperationCode::UAdd; - case OperationCode::IMul: - return OperationCode::UMul; - case OperationCode::IDiv: - return OperationCode::UDiv; - case OperationCode::IMin: - return OperationCode::UMin; - case OperationCode::IMax: - return OperationCode::UMax; - case OperationCode::ICastFloat: - return OperationCode::UCastFloat; - case OperationCode::ICastUnsigned: - return OperationCode::UCastSigned; - case OperationCode::ILogicalShiftLeft: - return OperationCode::ULogicalShiftLeft; - case OperationCode::ILogicalShiftRight: - return OperationCode::ULogicalShiftRight; - case OperationCode::IArithmeticShiftRight: - return OperationCode::UArithmeticShiftRight; - case OperationCode::IBitwiseAnd: - return OperationCode::UBitwiseAnd; - case OperationCode::IBitwiseOr: - return OperationCode::UBitwiseOr; - case OperationCode::IBitwiseXor: - return OperationCode::UBitwiseXor; - case OperationCode::IBitwiseNot: - return OperationCode::UBitwiseNot; - case OperationCode::IBitfieldExtract: - return OperationCode::UBitfieldExtract; - case OperationCode::IBitfieldInsert: - return OperationCode::UBitfieldInsert; - case OperationCode::IBitCount: - return OperationCode::UBitCount; - case OperationCode::LogicalILessThan: - return OperationCode::LogicalULessThan; - case OperationCode::LogicalIEqual: - return OperationCode::LogicalUEqual; - case OperationCode::LogicalILessEqual: - return OperationCode::LogicalULessEqual; - case OperationCode::LogicalIGreaterThan: - return OperationCode::LogicalUGreaterThan; - case OperationCode::LogicalINotEqual: - return OperationCode::LogicalUNotEqual; - case OperationCode::LogicalIGreaterEqual: - return OperationCode::LogicalUGreaterEqual; - case OperationCode::AtomicIExchange: - return OperationCode::AtomicUExchange; - case OperationCode::AtomicIAdd: - return OperationCode::AtomicUAdd; - case OperationCode::AtomicIMin: - return OperationCode::AtomicUMin; - case OperationCode::AtomicIMax: - return OperationCode::AtomicUMax; - case OperationCode::AtomicIAnd: - return OperationCode::AtomicUAnd; - case OperationCode::AtomicIOr: - return OperationCode::AtomicUOr; - case OperationCode::AtomicIXor: - return OperationCode::AtomicUXor; - case OperationCode::INegate: - UNREACHABLE_MSG("Can't negate an unsigned integer"); - return {}; - case OperationCode::IAbsolute: - UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); - return {}; - default: - UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); - return {}; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h deleted file mode 100644 index 1e0886185..000000000 --- a/src/video_core/shader/node_helper.h +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -/// This arithmetic operation cannot be constraint -inline constexpr MetaArithmetic PRECISE = {true}; -/// This arithmetic operation can be optimized away -inline constexpr MetaArithmetic NO_PRECISE = {false}; - -/// Creates a conditional node -Node Conditional(Node condition, std::vector code); - -/// Creates a commentary node -Node Comment(std::string text); - -/// Creates an u32 immediate -Node Immediate(u32 value); - -/// Creates a s32 immediate -Node Immediate(s32 value); - -/// Creates a f32 immediate -Node Immediate(f32 value); - -/// Converts an signed operation code to an unsigned operation code -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); - -template -Node MakeNode(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -template -TrackSampler MakeTrackSampler(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T{std::forward(args)...}); -} - -template -Node Operation(OperationCode code, Args&&... args) { - if constexpr (sizeof...(args) == 0) { - return MakeNode(code); - } else if constexpr (std::is_convertible_v>, - Meta>) { - return MakeNode(code, std::forward(args)...); - } else { - return MakeNode(code, Meta{}, std::forward(args)...); - } -} - -template -Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { - return Operation(SignedToUnsignedCode(code, is_signed), std::forward(args)...); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp deleted file mode 100644 index 148d91fcb..000000000 --- a/src/video_core/shader/registry.cpp +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/kepler_compute.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -using Tegra::Engines::ConstBufferEngineInterface; -using Tegra::Engines::SamplerDescriptor; -using Tegra::Engines::ShaderType; - -namespace { - -GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage == ShaderType::Compute) { - return {}; - } - - auto& graphics = dynamic_cast(engine); - - return { - .tfb_layouts = graphics.regs.tfb_layouts, - .tfb_varying_locs = graphics.regs.tfb_varying_locs, - .primitive_topology = graphics.regs.draw.topology, - .tessellation_primitive = graphics.regs.tess_mode.prim, - .tessellation_spacing = graphics.regs.tess_mode.spacing, - .tfb_enabled = graphics.regs.tfb_enabled != 0, - .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, - }; -} - -ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage != ShaderType::Compute) { - return {}; - } - - auto& compute = dynamic_cast(engine); - const auto& launch = compute.launch_description; - - return { - .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, - .shared_memory_size_in_words = launch.shared_alloc, - .local_memory_size_in_words = launch.local_pos_alloc, - }; -} - -} // Anonymous namespace - -Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) - : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, - bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} - -Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) - : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, - graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( - shader_stage, engine_)} {} - -Registry::~Registry() = default; - -std::optional Registry::ObtainKey(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = keys.find(key); - if (iter != keys.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); - keys.emplace(key, value); - return value; -} - -std::optional Registry::ObtainBoundSampler(u32 offset) { - const u32 key = offset; - const auto iter = bound_samplers.find(key); - if (iter != bound_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); - bound_samplers.emplace(key, value); - return value; -} - -std::optional Registry::ObtainSeparateSampler( - std::pair buffers, std::pair offsets) { - SeparateSamplerKey key; - key.buffers = buffers; - key.offsets = offsets; - const auto iter = separate_samplers.find(key); - if (iter != separate_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - - const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); - const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); - const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); - separate_samplers.emplace(key, value); - return value; -} - -std::optional Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = bindless_samplers.find(key); - if (iter != bindless_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); - bindless_samplers.emplace(key, value); - return value; -} - -void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { - keys.insert_or_assign({buffer, offset}, value); -} - -void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { - bound_samplers.insert_or_assign(offset, sampler); -} - -void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { - bindless_samplers.insert_or_assign({buffer, offset}, sampler); -} - -bool Registry::IsConsistent() const { - if (!engine) { - return true; - } - return std::all_of(keys.begin(), keys.end(), - [this](const auto& pair) { - const auto [cbuf, offset] = pair.first; - const auto value = pair.second; - return value == engine->AccessConstBuffer32(stage, cbuf, offset); - }) && - std::all_of(bound_samplers.begin(), bound_samplers.end(), - [this](const auto& sampler) { - const auto [key, value] = sampler; - return value == engine->AccessBoundSampler(stage, key); - }) && - std::all_of(bindless_samplers.begin(), bindless_samplers.end(), - [this](const auto& sampler) { - const auto [cbuf, offset] = sampler.first; - const auto value = sampler.second; - return value == engine->AccessBindlessSampler(stage, cbuf, offset); - }); -} - -bool Registry::HasEqualKeys(const Registry& rhs) const { - return std::tie(keys, bound_samplers, bindless_samplers) == - std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); -} - -const GraphicsInfo& Registry::GetGraphicsInfo() const { - ASSERT(stage != Tegra::Engines::ShaderType::Compute); - return graphics_info; -} - -const ComputeInfo& Registry::GetComputeInfo() const { - ASSERT(stage == Tegra::Engines::ShaderType::Compute); - return compute_info; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h deleted file mode 100644 index 4bebefdde..000000000 --- a/src/video_core/shader/registry.h +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "common/hash.h" -#include "video_core/engines/const_buffer_engine_interface.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" - -namespace VideoCommon::Shader { - -struct SeparateSamplerKey { - std::pair buffers; - std::pair offsets; -}; - -} // namespace VideoCommon::Shader - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { - return std::hash{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ - key.offsets.second); - } -}; - -template <> -struct equal_to { - bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, - const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { - return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; - } -}; - -} // namespace std - -namespace VideoCommon::Shader { - -using KeyMap = std::unordered_map, u32, Common::PairHash>; -using BoundSamplerMap = std::unordered_map; -using SeparateSamplerMap = - std::unordered_map; -using BindlessSamplerMap = - std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; - -struct GraphicsInfo { - using Maxwell = Tegra::Engines::Maxwell3D::Regs; - - std::array - tfb_layouts{}; - std::array, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; - Maxwell::PrimitiveTopology primitive_topology{}; - Maxwell::TessellationPrimitive tessellation_primitive{}; - Maxwell::TessellationSpacing tessellation_spacing{}; - bool tfb_enabled = false; - bool tessellation_clockwise = false; -}; -static_assert(std::is_trivially_copyable_v && - std::is_standard_layout_v); - -struct ComputeInfo { - std::array workgroup_size{}; - u32 shared_memory_size_in_words = 0; - u32 local_memory_size_in_words = 0; -}; -static_assert(std::is_trivially_copyable_v && std::is_standard_layout_v); - -struct SerializedRegistryInfo { - VideoCore::GuestDriverProfile guest_driver_profile; - u32 bound_buffer = 0; - GraphicsInfo graphics; - ComputeInfo compute; -}; - -/** - * The Registry is a class use to interface the 3D and compute engines with the shader compiler. - * With it, the shader can obtain required data from GPU state and store it for disk shader - * compilation. - */ -class Registry { -public: - explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); - - explicit Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine_); - - ~Registry(); - - /// Retrieves a key from the registry, if it's registered, it will give the registered value, if - /// not it will obtain it from maxwell3d and register it. - std::optional ObtainKey(u32 buffer, u32 offset); - - std::optional ObtainBoundSampler(u32 offset); - - std::optional ObtainSeparateSampler( - std::pair buffers, std::pair offsets); - - std::optional ObtainBindlessSampler(u32 buffer, u32 offset); - - /// Inserts a key. - void InsertKey(u32 buffer, u32 offset, u32 value); - - /// Inserts a bound sampler key. - void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Inserts a bindless sampler key. - void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Checks keys and samplers against engine's current const buffers. - /// Returns true if they are the same value, false otherwise. - bool IsConsistent() const; - - /// Returns true if the keys are equal to the other ones in the registry. - bool HasEqualKeys(const Registry& rhs) const; - - /// Returns graphics information from this shader - const GraphicsInfo& GetGraphicsInfo() const; - - /// Returns compute information from this shader - const ComputeInfo& GetComputeInfo() const; - - /// Gives an getter to the const buffer keys in the database. - const KeyMap& GetKeys() const { - return keys; - } - - /// Gets samplers database. - const BoundSamplerMap& GetBoundSamplers() const { - return bound_samplers; - } - - /// Gets bindless samplers database. - const BindlessSamplerMap& GetBindlessSamplers() const { - return bindless_samplers; - } - - /// Gets bound buffer used on this shader - u32 GetBoundBuffer() const { - return bound_buffer; - } - - /// Obtains access to the guest driver's profile. - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { - return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; - } - -private: - const Tegra::Engines::ShaderType stage; - VideoCore::GuestDriverProfile stored_guest_driver_profile; - Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; - KeyMap keys; - BoundSamplerMap bound_samplers; - SeparateSamplerMap separate_samplers; - BindlessSamplerMap bindless_samplers; - u32 bound_buffer; - GraphicsInfo graphics_info; - ComputeInfo compute_info; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp deleted file mode 100644 index a4987ffc6..000000000 --- a/src/video_core/shader/shader_ir.cpp +++ /dev/null @@ -1,464 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Attribute; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaMode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredCondition; -using Tegra::Shader::PredOperation; -using Tegra::Shader::Register; - -ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, - Registry& registry_) - : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ - registry_} { - Decode(); - PostDecode(); -} - -ShaderIR::~ShaderIR() = default; - -Node ShaderIR::GetRegister(Register reg) { - if (reg != Register::ZeroIndex) { - used_registers.insert(static_cast(reg)); - } - return MakeNode(reg); -} - -Node ShaderIR::GetCustomVariable(u32 id) { - return MakeNode(id); -} - -Node ShaderIR::GetImmediate19(Instruction instr) { - return Immediate(instr.alu.GetImm20_19()); -} - -Node ShaderIR::GetImmediate32(Instruction instr) { - return Immediate(instr.alu.GetImm20_32()); -} - -Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { - const auto index = static_cast(index_); - const auto offset = static_cast(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); - - return MakeNode(index, Immediate(offset)); -} - -Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { - const auto index = static_cast(index_); - const auto offset = static_cast(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); - - Node final_offset = [&] { - // Attempt to inline constant buffer without a variable offset. This is done to allow - // tracking LDC calls. - if (const auto gpr = std::get_if(&*node)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - return Immediate(offset); - } - } - return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); - }(); - return MakeNode(index, std::move(final_offset)); -} - -Node ShaderIR::GetPredicate(u64 pred_, bool negated) { - const auto pred = static_cast(pred_); - if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { - used_predicates.insert(pred); - } - - return MakeNode(pred, negated); -} - -Node ShaderIR::GetPredicate(bool immediate) { - return GetPredicate(static_cast(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); -} - -Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_input_attributes.emplace(index); - return MakeNode(index, static_cast(element), std::move(buffer)); -} - -Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { - uses_physical_attributes = true; - return MakeNode(GetRegister(physical_address), buffer); -} - -Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_output_attributes.insert(index); - return MakeNode(index, static_cast(element), std::move(buffer)); -} - -Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { - Node node = MakeNode(flag); - if (negated) { - return Operation(OperationCode::LogicalNegate, std::move(node)); - } - return node; -} - -Node ShaderIR::GetLocalMemory(Node address) { - return MakeNode(std::move(address)); -} - -Node ShaderIR::GetSharedMemory(Node address) { - return MakeNode(std::move(address)); -} - -Node ShaderIR::GetTemporary(u32 id) { - return GetRegister(Register::ZeroIndex + 1 + id); -} - -Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { - switch (size) { - case Register::Size::Byte: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - return value; - case Register::Size::Short: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - return value; - case Register::Size::Word: - // Default - do nothing - return value; - default: - UNREACHABLE_MSG("Unimplemented conversion size: {}", size); - return value; - } -} - -Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { - if (!is_signed) { - // Absolute or negate on an unsigned is pointless - return value; - } - if (absolute) { - value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { - Node value = Immediate(instr.half_imm.PackImmediates()); - if (!has_negation) { - return value; - } - - Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); - Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); - - return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), - std::move(second_negate)); -} - -Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { - return Operation(OperationCode::HUnpack, type, std::move(value)); -} - -Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { - switch (merge) { - case Tegra::Shader::HalfMerge::H0_H1: - return src; - case Tegra::Shader::HalfMerge::F32: - return Operation(OperationCode::HMergeF32, std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H0: - return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H1: - return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); - } - UNREACHABLE(); - return src; -} - -Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), - GetPredicate(true)); - } - return value; -} - -Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { - if (condition == PredCondition::T) { - return GetPredicate(true); - } else if (condition == PredCondition::F) { - return GetPredicate(false); - } - - static constexpr std::array comparison_table{ - OperationCode(0), - OperationCode::LogicalFOrdLessThan, // LT - OperationCode::LogicalFOrdEqual, // EQ - OperationCode::LogicalFOrdLessEqual, // LE - OperationCode::LogicalFOrdGreaterThan, // GT - OperationCode::LogicalFOrdNotEqual, // NE - OperationCode::LogicalFOrdGreaterEqual, // GE - OperationCode::LogicalFOrdered, // NUM - OperationCode::LogicalFUnordered, // NAN - OperationCode::LogicalFUnordLessThan, // LTU - OperationCode::LogicalFUnordEqual, // EQU - OperationCode::LogicalFUnordLessEqual, // LEU - OperationCode::LogicalFUnordGreaterThan, // GTU - OperationCode::LogicalFUnordNotEqual, // NEU - OperationCode::LogicalFUnordGreaterEqual, // GEU - }; - const std::size_t index = static_cast(condition); - ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); - - return Operation(comparison_table[index], op_a, op_b); -} - -Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, - std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, - std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, - std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, - std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, - std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), - std::move(op_b)); -} - -Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, - std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, - std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, - std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, - std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, - std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, - std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, - std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, - std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, - std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, - std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); -} - -OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { - static constexpr std::array operation_table{ - OperationCode::LogicalAnd, - OperationCode::LogicalOr, - OperationCode::LogicalXor, - }; - - const auto index = static_cast(operation); - if (index >= operation_table.size()) { - UNIMPLEMENTED_MSG("Unknown predicate operation."); - return {}; - } - - return operation_table[index]; -} - -Node ShaderIR::GetConditionCode(ConditionCode cc) const { - switch (cc) { - case ConditionCode::NEU: - return GetInternalFlag(InternalFlag::Zero, true); - case ConditionCode::FCSM_TR: - UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); - return MakeNode(Pred::NeverExecute, false); - default: - UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); - return MakeNode(Pred::NeverExecute, false); - } -} - -void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { - bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); -} - -void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); -} - -void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); -} - -void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { - SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); -} - -void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), - Immediate(offset), Immediate(bits)); -} - -Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), - Immediate(bits)); -} - -void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { - switch (index) { - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - break; - case 1: - uses_layer = true; - break; - case 2: - uses_viewport_index = true; - break; - case 3: - uses_point_size = true; - break; - } - break; - case Attribute::Index::TessCoordInstanceIDVertexID: - switch (element) { - case 2: - uses_instance_id = true; - break; - case 3: - uses_vertex_id = true; - break; - } - break; - case Attribute::Index::ClipDistances0123: - case Attribute::Index::ClipDistances4567: { - const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element; - used_clip_distances.at(clip_index) = true; - break; - } - case Attribute::Index::FrontColor: - case Attribute::Index::FrontSecondaryColor: - case Attribute::Index::BackColor: - case Attribute::Index::BackSecondaryColor: - uses_legacy_varyings = true; - break; - default: - if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) { - uses_legacy_varyings = true; - } - break; - } -} - -std::size_t ShaderIR::DeclareAmend(Node new_amend) { - const auto id = amend_code.size(); - amend_code.push_back(std::move(new_amend)); - return id; -} - -u32 ShaderIR::NewCustomVariable() { - return num_custom_variables++; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h deleted file mode 100644 index 1cd7c14d7..000000000 --- a/src/video_core/shader/shader_ir.h +++ /dev/null @@ -1,479 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct ShaderBlock; - -constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; - -struct ConstBuffer { - constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) - : max_offset{max_offset_}, is_indirect{is_indirect_} {} - - constexpr ConstBuffer() = default; - - void MarkAsUsed(u64 offset) { - max_offset = std::max(max_offset, static_cast(offset)); - } - - void MarkAsUsedIndirect() { - is_indirect = true; - } - - bool IsIndirect() const { - return is_indirect; - } - - u32 GetSize() const { - return max_offset + static_cast(sizeof(float)); - } - - u32 GetMaxOffset() const { - return max_offset; - } - -private: - u32 max_offset = 0; - bool is_indirect = false; -}; - -struct GlobalMemoryUsage { - bool is_read{}; - bool is_written{}; -}; - -class ShaderIR final { -public: - explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, - CompilerSettings settings_, Registry& registry_); - ~ShaderIR(); - - const std::map& GetBasicBlocks() const { - return basic_blocks; - } - - const std::set& GetRegisters() const { - return used_registers; - } - - const std::set& GetPredicates() const { - return used_predicates; - } - - const std::set& GetInputAttributes() const { - return used_input_attributes; - } - - const std::set& GetOutputAttributes() const { - return used_output_attributes; - } - - const std::map& GetConstantBuffers() const { - return used_cbufs; - } - - const std::list& GetSamplers() const { - return used_samplers; - } - - const std::list& GetImages() const { - return used_images; - } - - const std::array& GetClipDistances() - const { - return used_clip_distances; - } - - const std::map& GetGlobalMemory() const { - return used_global_memory; - } - - std::size_t GetLength() const { - return static_cast(coverage_end * sizeof(u64)); - } - - bool UsesLayer() const { - return uses_layer; - } - - bool UsesViewportIndex() const { - return uses_viewport_index; - } - - bool UsesPointSize() const { - return uses_point_size; - } - - bool UsesInstanceId() const { - return uses_instance_id; - } - - bool UsesVertexId() const { - return uses_vertex_id; - } - - bool UsesLegacyVaryings() const { - return uses_legacy_varyings; - } - - bool UsesYNegate() const { - return uses_y_negate; - } - - bool UsesWarps() const { - return uses_warps; - } - - bool HasPhysicalAttributes() const { - return uses_physical_attributes; - } - - const Tegra::Shader::Header& GetHeader() const { - return header; - } - - bool IsFlowStackDisabled() const { - return disable_flow_stack; - } - - bool IsDecompiled() const { - return decompiled; - } - - const ASTManager& GetASTManager() const { - return program_manager; - } - - ASTNode GetASTProgram() const { - return program_manager.GetProgram(); - } - - u32 GetASTNumVariables() const { - return program_manager.GetVariables(); - } - - u32 ConvertAddressToNvidiaSpace(u32 address) const { - return (address - main_offset) * static_cast(sizeof(Tegra::Shader::Instruction)); - } - - /// Returns a condition code evaluated from internal flags - Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; - - const Node& GetAmendNode(std::size_t index) const { - return amend_code[index]; - } - - u32 GetNumCustomVariables() const { - return num_custom_variables; - } - -private: - friend class ASTDecoder; - - struct SamplerInfo { - std::optional type; - std::optional is_array; - std::optional is_shadow; - std::optional is_buffer; - - constexpr bool IsComplete() const noexcept { - return type && is_array && is_shadow && is_buffer; - } - }; - - void Decode(); - void PostDecode(); - - NodeBlock DecodeRange(u32 begin, u32 end); - void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); - void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); - - /** - * Decodes a single instruction from Tegra to IR. - * @param bb Basic block where the nodes will be written to. - * @param pc Program counter. Offset to decode. - * @return Next address to decode. - */ - u32 DecodeInstr(NodeBlock& bb, u32 pc); - - u32 DecodeArithmetic(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); - u32 DecodeBfe(NodeBlock& bb, u32 pc); - u32 DecodeBfi(NodeBlock& bb, u32 pc); - u32 DecodeShift(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); - u32 DecodeFfma(NodeBlock& bb, u32 pc); - u32 DecodeHfma2(NodeBlock& bb, u32 pc); - u32 DecodeConversion(NodeBlock& bb, u32 pc); - u32 DecodeWarp(NodeBlock& bb, u32 pc); - u32 DecodeMemory(NodeBlock& bb, u32 pc); - u32 DecodeTexture(NodeBlock& bb, u32 pc); - u32 DecodeImage(NodeBlock& bb, u32 pc); - u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeFloatSet(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); - u32 DecodeHalfSet(NodeBlock& bb, u32 pc); - u32 DecodeVideo(NodeBlock& bb, u32 pc); - u32 DecodeXmad(NodeBlock& bb, u32 pc); - u32 DecodeOther(NodeBlock& bb, u32 pc); - - /// Generates a node for a passed register. - Node GetRegister(Tegra::Shader::Register reg); - /// Generates a node for a custom variable - Node GetCustomVariable(u32 id); - /// Generates a node representing a 19-bit immediate value - Node GetImmediate19(Tegra::Shader::Instruction instr); - /// Generates a node representing a 32-bit immediate value - Node GetImmediate32(Tegra::Shader::Instruction instr); - /// Generates a node representing a constant buffer - Node GetConstBuffer(u64 index, u64 offset); - /// Generates a node representing a constant buffer with a variadic offset - Node GetConstBufferIndirect(u64 index, u64 offset, Node node); - /// Generates a node for a passed predicate. It can be optionally negated - Node GetPredicate(u64 pred, bool negated = false); - /// Generates a predicate node for an immediate true or false value - Node GetPredicate(bool immediate); - /// Generates a node representing an input attribute. Keeps track of used attributes. - Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); - /// Generates a node representing a physical input attribute. - Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); - /// Generates a node representing an output attribute. Keeps track of used attributes. - Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); - /// Generates a node representing an internal flag - Node GetInternalFlag(InternalFlag flag, bool negated = false) const; - /// Generates a node representing a local memory address - Node GetLocalMemory(Node address); - /// Generates a node representing a shared memory address - Node GetSharedMemory(Node address); - /// Generates a temporary, internally it uses a post-RZ register - Node GetTemporary(u32 id); - - /// Sets a register. src value must be a number-evaluated node. - void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); - /// Sets a predicate. src value must be a bool-evaluated node - void SetPredicate(NodeBlock& bb, u64 dest, Node src); - /// Sets an internal flag. src value must be a bool-evaluated node - void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); - /// Sets a local memory address with a value. - void SetLocalMemory(NodeBlock& bb, Node address, Node value); - /// Sets a shared memory address with a value. - void SetSharedMemory(NodeBlock& bb, Node address, Node value); - /// Sets a temporary. Internally it uses a post-RZ register - void SetTemporary(NodeBlock& bb, u32 id, Node value); - - /// Sets internal flags from a float - void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); - /// Sets internal flags from an integer - void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); - - /// Conditionally absolute/negated float. Absolute is applied first - Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); - /// Conditionally saturates a float - Node GetSaturatedFloat(Node value, bool saturate = true); - - /// Converts an integer to different sizes. - Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); - /// Conditionally absolute/negated integer. Absolute is applied first - Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); - - /// Unpacks a half immediate from an instruction - Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); - /// Unpacks a binary value into a half float pair with a type format - Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); - /// Merges a half pair into another value - Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); - /// Conditionally absolute/negated half float pair. Absolute is applied first - Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); - /// Conditionally saturates a half float pair - Node GetSaturatedHalfFloat(Node value, bool saturate = true); - - /// Get image component value by type and size - std::pair GetComponentValue(Tegra::Texture::ComponentType component_type, - u32 component_size, Node original_value); - - /// Returns a predicate comparing two floats - Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - /// Returns a predicate comparing two integers - Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, - Node op_a, Node op_b); - /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared - Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - - /// Returns a predicate combiner operation - OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); - - /// Queries the missing sampler info from the execution context. - SamplerInfo GetSamplerInfo(SamplerInfo info, - std::optional sampler); - - /// Accesses a texture sampler. - std::optional GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); - - /// Accesses a texture sampler for a bindless texture. - std::optional GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var); - - /// Accesses an image. - ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); - - /// Access a bindless image sampler. - ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); - - /// Extracts a sequence of bits from a node - Node BitfieldExtract(Node value, u32 offset, u32 bits); - - /// Inserts a sequence of bits from a node - Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); - - /// Marks the usage of a input or output attribute. - void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); - - /// Decodes VMNMX instruction and inserts its code into the passed basic block. - void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); - - void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); - - void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - - Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, bool is_aoffi, - std::optional bindless_reg); - - Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array); - - Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, - bool is_bindless); - - Node4 GetTldCode(Tegra::Shader::Instruction instr); - - Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool is_array); - - std::tuple ValidateAndGetCoordinateElement( - Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, - bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); - - std::vector GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); - - std::vector GetPtpCoordinates(std::array ptp_regs); - - Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, std::vector aoffi, - std::optional bindless_reg); - - Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, - u64 byte_height); - - void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, - Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, - Tegra::Shader::PredicateResultMode predicate_mode, - Tegra::Shader::Pred predicate, bool sets_cc); - void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, - Node op_c, Node imm_lut, bool sets_cc); - - std::tuple TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor); - - std::pair HandleBindlessIndirectRead(const CbufNode& cbuf, - const OperationNode& operation, - Node gpr, Node base_offset, - Node tracked, const NodeBlock& code, - s64 cursor); - - std::optional TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const; - - std::tuple TrackGlobalMemory(NodeBlock& bb, - Tegra::Shader::Instruction instr, - bool is_read, bool is_write); - - /// Register new amending code and obtain the reference id. - std::size_t DeclareAmend(Node new_amend); - - u32 NewCustomVariable(); - - const ProgramCode& program_code; - const u32 main_offset; - const CompilerSettings settings; - Registry& registry; - - bool decompiled{}; - bool disable_flow_stack{}; - - u32 coverage_begin{}; - u32 coverage_end{}; - - std::map basic_blocks; - NodeBlock global_code; - ASTManager program_manager{true, true}; - std::vector amend_code; - u32 num_custom_variables{}; - - std::set used_registers; - std::set used_predicates; - std::set used_input_attributes; - std::set used_output_attributes; - std::map used_cbufs; - std::list used_samplers; - std::list used_images; - std::array used_clip_distances{}; - std::map used_global_memory; - bool uses_layer{}; - bool uses_viewport_index{}; - bool uses_point_size{}; - bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes - bool uses_instance_id{}; - bool uses_vertex_id{}; - bool uses_legacy_varyings{}; - bool uses_y_negate{}; - bool uses_warps{}; - bool uses_indexed_samplers{}; - - Tegra::Shader::Header header; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp deleted file mode 100644 index 6be3ea92b..000000000 --- a/src/video_core/shader/track.cpp +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -std::pair FindOperation(const NodeBlock& code, s64 cursor, - OperationCode operation_code) { - for (; cursor >= 0; --cursor) { - Node node = code.at(cursor); - - if (const auto operation = std::get_if(&*node)) { - if (operation->GetCode() == operation_code) { - return {std::move(node), cursor}; - } - } - - if (const auto conditional = std::get_if(&*node)) { - const auto& conditional_code = conditional->GetCode(); - auto result = FindOperation( - conditional_code, static_cast(conditional_code.size() - 1), operation_code); - auto& found = result.first; - if (found) { - return {std::move(found), cursor}; - } - } - } - return {}; -} - -std::optional> DecoupleIndirectRead(const OperationNode& operation) { - if (operation.GetCode() != OperationCode::UAdd) { - return std::nullopt; - } - Node gpr; - Node offset; - ASSERT(operation.GetOperandsCount() == 2); - for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { - Node operand = operation[i]; - if (std::holds_alternative(*operand)) { - offset = operation[i]; - } else if (std::holds_alternative(*operand)) { - gpr = operation[i]; - } - } - if (offset && gpr) { - return std::make_pair(gpr, offset); - } - return std::nullopt; -} - -bool AmendNodeCv(std::size_t amend_index, Node node) { - if (const auto operation = std::get_if(&*node)) { - operation->SetAmendIndex(amend_index); - return true; - } - if (const auto conditional = std::get_if(&*node)) { - conditional->SetAmendIndex(amend_index); - return true; - } - return false; -} - -} // Anonymous namespace - -std::pair ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor) { - if (const auto cbuf = std::get_if(&*tracked)) { - const u32 cbuf_index = cbuf->GetIndex(); - - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if(&*offset)) { - auto track = MakeTrackSampler(cbuf_index, immediate->GetValue()); - return {tracked, track}; - } - if (const auto operation = std::get_if(&*offset)) { - const u32 bound_buffer = registry.GetBoundBuffer(); - if (bound_buffer != cbuf_index) { - return {}; - } - if (const std::optional pair = DecoupleIndirectRead(*operation)) { - auto [gpr, base_offset] = *pair; - return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, - code, cursor); - } - } - return {}; - } - if (const auto gpr = std::get_if(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackBindlessSampler(source, code, new_cursor); - } - if (const auto operation = std::get_if(&*tracked)) { - const OperationNode& op = *operation; - - const OperationCode opcode = operation->GetCode(); - if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { - ASSERT(op.GetOperandsCount() == 2); - auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); - auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); - if (node_a && node_b) { - auto track = MakeTrackSampler(std::pair{index_a, index_b}, - std::pair{offset_a, offset_b}); - return {tracked, std::move(track)}; - } - } - std::size_t i = op.GetOperandsCount(); - while (i--) { - if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { - // Constant buffer found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackBindlessSampler(tracked, conditional_code, - static_cast(conditional_code.size())); - } - return {}; -} - -std::pair ShaderIR::HandleBindlessIndirectRead( - const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, - const NodeBlock& code, s64 cursor) { - const auto offset_imm = std::get(*base_offset); - const auto& gpu_driver = registry.AccessGuestDriverProfile(); - const u32 bindless_cv = NewCustomVariable(); - const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); - Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); - - Node cv_node = GetCustomVariable(bindless_cv); - Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); - const std::size_t amend_index = DeclareAmend(std::move(amend_op)); - AmendNodeCv(amend_index, code[cursor]); - - // TODO: Implement bindless index custom variable - auto track = - MakeTrackSampler(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); - return {tracked, track}; -} - -std::tuple ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, - s64 cursor) const { - if (const auto cbuf = std::get_if(&*tracked)) { - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if(&*offset)) { - return {tracked, cbuf->GetIndex(), immediate->GetValue()}; - } - return {}; - } - if (const auto gpr = std::get_if(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackCbuf(source, code, new_cursor); - } - if (const auto operation = std::get_if(&*tracked)) { - for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { - if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { - // Cbuf found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackCbuf(tracked, conditional_code, static_cast(conditional_code.size())); - } - return {}; -} - -std::optional ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register - // that it uses as operand - const auto result = TrackRegister(&std::get(*tracked), code, cursor - 1); - const auto& found = result.first; - if (!found) { - return std::nullopt; - } - if (const auto immediate = std::get_if(&*found)) { - return immediate->GetValue(); - } - return std::nullopt; -} - -std::pair ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const { - for (; cursor >= 0; --cursor) { - const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); - if (!found_node) { - return {}; - } - const auto operation = std::get_if(&*found_node); - ASSERT(operation); - - const auto& target = (*operation)[0]; - if (const auto gpr_target = std::get_if(&*target)) { - if (gpr_target->GetIndex() == tracked->GetIndex()) { - return {(*operation)[1], new_cursor}; - } - } - } - return {}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp deleted file mode 100644 index 22a933761..000000000 --- a/src/video_core/shader/transform_feedback.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/transform_feedback.h" - -namespace VideoCommon::Shader { - -namespace { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 - -/// Attribute offsets that describe a vector -constexpr std::array VECTORS = { - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] -}; -} // namespace - -std::unordered_map BuildTransformFeedback(const GraphicsInfo& info) { - - std::unordered_map tfb; - - for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = info.tfb_varying_locs[buffer]; - const auto& layout = info.tfb_layouts[buffer]; - const std::size_t varying_count = layout.varying_count; - - std::size_t highest = 0; - - for (std::size_t offset = 0; offset < varying_count; ++offset) { - const std::size_t base_offset = offset; - const u8 location = locations[offset]; - - VaryingTFB varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * sizeof(u32); - varying.components = 1; - - if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - - [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; - UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); - - highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); - } - - UNIMPLEMENTED_IF(highest != layout.stride); - } - return tfb; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h deleted file mode 100644 index 77d05f64c..000000000 --- a/src/video_core/shader/transform_feedback.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "common/common_types.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct VaryingTFB { - std::size_t buffer; - std::size_t stride; - std::size_t offset; - std::size_t components; -}; - -std::unordered_map BuildTransformFeedback(const GraphicsInfo& info); - -} // namespace VideoCommon::Shader -- cgit v1.2.3 From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 5 Apr 2021 22:25:22 -0400 Subject: shader_recompiler,video_core: Cleanup some GCC and Clang errors Mostly fixing unused *, implicit conversion, braced scalar init, fpermissive, and some others. Some Clang errors likely remain in video_core, and std::ranges is still a pertinent issue in shader_recompiler shader_recompiler: cmake: Force bracket depth to 1024 on Clang Increases the maximum fold expression depth thread_worker: Include condition_variable Don't use list initializers in control flow Co-authored-by: ReinUsesLisp --- src/common/thread_worker.h | 1 + src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 4 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 19 +-- .../backend/spirv/emit_spirv_image.cpp | 11 +- .../backend/spirv/emit_spirv_warp.cpp | 2 +- src/shader_recompiler/file_environment.h | 2 +- src/shader_recompiler/frontend/ir/attribute.cpp | 4 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 +- src/shader_recompiler/frontend/ir/condition.cpp | 6 +- src/shader_recompiler/frontend/ir/condition.h | 4 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 +- .../frontend/ir/microinstruction.cpp | 16 +-- .../frontend/ir/microinstruction.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/ir/program.cpp | 2 - src/shader_recompiler/frontend/ir/value.cpp | 4 +- src/shader_recompiler/frontend/ir/value.h | 2 +- .../frontend/maxwell/control_flow.cpp | 140 +++++++++------------ src/shader_recompiler/frontend/maxwell/decode.cpp | 10 +- .../maxwell/indirect_branch_table_track.cpp | 10 +- .../frontend/maxwell/structured_control_flow.cpp | 3 +- .../frontend/maxwell/translate/impl/double_add.cpp | 6 +- .../translate/impl/double_fused_multiply_add.cpp | 6 +- .../maxwell/translate/impl/double_multiply.cpp | 6 +- .../maxwell/translate/impl/floating_point_add.cpp | 6 +- .../translate/impl/floating_point_compare.cpp | 3 +- .../impl/floating_point_compare_and_set.cpp | 6 +- .../floating_point_conversion_floating_point.cpp | 6 +- .../impl/floating_point_conversion_integer.cpp | 11 +- .../impl/floating_point_fused_multiply_add.cpp | 6 +- .../translate/impl/floating_point_min_max.cpp | 6 +- .../translate/impl/floating_point_multiply.cpp | 8 +- .../impl/floating_point_set_predicate.cpp | 6 +- .../translate/impl/floating_point_swizzled_add.cpp | 6 +- .../translate/impl/half_floating_point_add.cpp | 11 +- .../half_floating_point_fused_multiply_add.cpp | 11 +- .../impl/half_floating_point_multiply.cpp | 11 +- .../translate/impl/half_floating_point_set.cpp | 11 +- .../impl/half_floating_point_set_predicate.cpp | 12 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 +- .../maxwell/translate/impl/integer_add.cpp | 1 - .../impl/integer_floating_point_conversion.cpp | 4 +- .../maxwell/translate/impl/load_constant.cpp | 2 +- .../translate/impl/load_store_local_shared.cpp | 9 +- .../maxwell/translate/impl/load_store_memory.cpp | 4 +- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../translate/impl/texture_gather_swizzled.cpp | 2 +- .../translate/impl/texture_load_swizzled.cpp | 2 +- .../maxwell/translate/impl/texture_query.cpp | 2 +- .../maxwell/translate/impl/video_set_predicate.cpp | 1 - .../ir_opt/collect_shader_info_pass.cpp | 20 +-- .../ir_opt/constant_propagation_pass.cpp | 49 ++++---- .../global_memory_to_storage_buffer_pass.cpp | 42 +++---- .../ir_opt/identity_removal_pass.cpp | 3 +- .../ir_opt/lower_fp16_to_fp32.cpp | 2 +- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 4 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 32 ++--- src/shader_recompiler/ir_opt/verification_pass.cpp | 4 +- src/tests/common/unique_function.cpp | 2 + src/video_core/CMakeLists.txt | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 ++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 5 +- .../renderer_vulkan/vk_render_pass_cache.cpp | 2 - .../renderer_vulkan/vk_texture_cache.cpp | 2 +- 66 files changed, 308 insertions(+), 313 deletions(-) (limited to 'src/common') diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h index 0a975a869..cd0017726 100644 --- a/src/common/thread_worker.h +++ b/src/common/thread_worker.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 22639fe13..551bf1c58 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -196,6 +196,8 @@ else() $<$:-Werror=unused-but-set-parameter> $<$:-Werror=unused-but-set-variable> -Werror=unused-variable + + $<$:-fbracket-depth=1024> ) endif() diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index b738e00cc..0c114402b 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -116,7 +117,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie const std::string_view def_name_view( def_name.data(), fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); - defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); + defs[static_cast(i)] = + sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 32512a0e5..355cf0ca8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -16,7 +16,7 @@ namespace Shader::Backend::SPIRV { namespace { template -struct FuncTraits : FuncTraits {}; +struct FuncTraits {}; template struct FuncTraits { @@ -64,17 +64,20 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { template void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { using Traits = FuncTraits; - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { if constexpr (is_first_arg_inst) { - SetDefinition(ctx, inst, inst, Arg>(ctx, inst->Arg(I))...); + SetDefinition( + ctx, inst, inst, + Arg>(ctx, inst->Arg(I))...); } else { - SetDefinition(ctx, inst, Arg>(ctx, inst->Arg(I))...); + SetDefinition( + ctx, inst, Arg>(ctx, inst->Arg(I))...); } } else { if constexpr (is_first_arg_inst) { - func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); } else { - func(ctx, Arg>(ctx, inst->Arg(I))...); + func(ctx, Arg>(ctx, inst->Arg(I))...); } } } @@ -94,14 +97,14 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) { } void EmitInst(EmitContext& ctx, IR::Inst* inst) { - switch (inst->Opcode()) { + switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case IR::Opcode::name: \ return Invoke<&Emit##name>(ctx, inst); #include "shader_recompiler/frontend/ir/opcodes.inc" #undef OPCODE } - throw LogicError("Invalid opcode {}", inst->Opcode()); + throw LogicError("Invalid opcode {}", inst->GetOpcode()); } Id TypeId(const EmitContext& ctx, IR::Type type) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index f0f8db8c3..815ca6299 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -43,11 +43,13 @@ public: // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING"); return; } - const IR::Opcode opcode{values[0]->Opcode()}; - if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + const IR::Opcode opcode{values[0]->GetOpcode()}; + if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { throw LogicError("Invalid PTP arguments"); } - auto read{[&](int a, int b) { return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); }}; + auto read{[&](unsigned int a, unsigned int b) { + return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); + }}; const Id offsets{ ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)), @@ -297,13 +299,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref) { - const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } +#ifdef _WIN32 #pragma optimize("", off) +#endif Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index c57bd291d..12a03ed6e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -7,7 +7,7 @@ namespace Shader::Backend::SPIRV { namespace { Id WarpExtract(EmitContext& ctx, Id value) { - const Id shift{ctx.Constant(ctx.U32[1], 5)}; + [[maybe_unused]] const Id shift{ctx.Constant(ctx.U32[1], 5)}; const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); } diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index 17640a622..71601f8fd 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -7,7 +7,7 @@ namespace Shader { -class FileEnvironment final : public Environment { +class FileEnvironment : public Environment { public: explicit FileEnvironment(const char* path); ~FileEnvironment() override; diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 4811242ea..7993e5c43 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -17,7 +17,7 @@ u32 GenericAttributeIndex(Attribute attribute) { if (!IsGeneric(attribute)) { throw InvalidArgument("Attribute is not generic {}", attribute); } - return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4; + return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4u; } std::string NameOf(Attribute attribute) { @@ -444,4 +444,4 @@ std::string NameOf(Attribute attribute) { return fmt::format("", static_cast(attribute)); } -} // namespace Shader::IR \ No newline at end of file +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index ec029dfd6..e1f0191f4 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -155,7 +155,7 @@ std::string DumpBlock(const Block& block, const std::map& ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); for (const Inst& inst : block) { - const Opcode op{inst.Opcode()}; + const Opcode op{inst.GetOpcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast(&inst)); if (TypeOf(op) != Type::Void) { ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp index ec1659e2b..fc18ea2a2 100644 --- a/src/shader_recompiler/frontend/ir/condition.cpp +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -12,10 +12,10 @@ namespace Shader::IR { std::string NameOf(Condition condition) { std::string ret; - if (condition.FlowTest() != FlowTest::T) { - ret = fmt::to_string(condition.FlowTest()); + if (condition.GetFlowTest() != FlowTest::T) { + ret = fmt::to_string(condition.GetFlowTest()); } - const auto [pred, negated]{condition.Pred()}; + const auto [pred, negated]{condition.GetPred()}; if (!ret.empty()) { ret += '&'; } diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 51c2f15cf..aa8597c60 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -30,11 +30,11 @@ public: auto operator<=>(const Condition&) const noexcept = default; - [[nodiscard]] IR::FlowTest FlowTest() const noexcept { + [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept { return static_cast(flow_test); } - [[nodiscard]] std::pair Pred() const noexcept { + [[nodiscard]] std::pair GetPred() const noexcept { return {static_cast(pred), pred_negated != 0}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 13eb2de4c..a2104bdb3 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -290,8 +290,8 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { } U1 IREmitter::Condition(IR::Condition cond) { - const FlowTest flow_test{cond.FlowTest()}; - const auto [pred, is_negated]{cond.Pred()}; + const FlowTest flow_test{cond.GetFlowTest()}; + const auto [pred, is_negated]{cond.GetPred()}; return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 481202d94..ceb44e604 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -12,7 +12,7 @@ namespace Shader::IR { namespace { void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { - if (inst && inst->Opcode() != opcode) { + if (inst && inst->GetOpcode() != opcode) { throw LogicError("Invalid pseudo-instruction"); } } @@ -25,11 +25,17 @@ void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { } void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { - if (inst->Opcode() != expected_opcode) { + if (inst->GetOpcode() != expected_opcode) { throw LogicError("Undoing use of invalid pseudo-op"); } inst = nullptr; } + +void AllocAssociatedInsts(std::unique_ptr& associated_insts) { + if (!associated_insts) { + associated_insts = std::make_unique(); + } +} } // Anonymous namespace Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { @@ -249,12 +255,6 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } -void AllocAssociatedInsts(std::unique_ptr& associated_insts) { - if (!associated_insts) { - associated_insts = std::make_unique(); - } -} - void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 6658dc674..97dc91d85 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -46,7 +46,7 @@ public: } /// Get the opcode this microinstruction represents. - [[nodiscard]] IR::Opcode Opcode() const noexcept { + [[nodiscard]] IR::Opcode GetOpcode() const noexcept { return op; } @@ -95,7 +95,7 @@ public: requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) [[nodiscard]] FlagsType Flags() const noexcept { FlagsType ret; - std::memcpy(&ret, &flags, sizeof(ret)); + std::memcpy(reinterpret_cast(&ret), &flags, sizeof(ret)); return ret; } diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1cb9db6c9..002dbf94e 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -49,7 +49,7 @@ constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ OpcodeMeta{ \ .name{#name_token}, \ - .type{type_token}, \ + .type = type_token, \ .arg_types{__VA_ARGS__}, \ }, #include "opcodes.inc" diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 5f51aeb5f..89a17fb1b 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include #include diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 837c1b487..1e7ffb86d 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -33,11 +33,11 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} bool Value::IsIdentity() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Identity; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; } bool Value::IsPhi() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Phi; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; } bool Value::IsEmpty() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index b27601e70..a0962863d 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -94,7 +94,7 @@ public: } } - explicit TypedValue(IR::Inst* inst) : TypedValue(Value(inst)) {} + explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; using U1 = TypedValue; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 847bb1986..cb8ec7eaa 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -34,41 +34,37 @@ struct Compare { }; u32 BranchOffset(Location pc, Instruction inst) { - return pc.Offset() + inst.branch.Offset() + 8; + return pc.Offset() + static_cast(inst.branch.Offset()) + 8u; } void Split(Block* old_block, Block* new_block, Location pc) { if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - *new_block = Block{ - .begin{pc}, - .end{old_block->end}, - .end_class{old_block->end_class}, - .cond{old_block->cond}, - .stack{old_block->stack}, - .branch_true{old_block->branch_true}, - .branch_false{old_block->branch_false}, - .function_call{old_block->function_call}, - .return_block{old_block->return_block}, - .branch_reg{old_block->branch_reg}, - .branch_offset{old_block->branch_offset}, - .indirect_branches{std::move(old_block->indirect_branches)}, - }; - *old_block = Block{ - .begin{old_block->begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{std::move(old_block->stack)}, - .branch_true{new_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + *new_block = Block{}; + new_block->begin = pc; + new_block->end = old_block->end; + new_block->end_class = old_block->end_class, + new_block->cond = old_block->cond; + new_block->stack = old_block->stack; + new_block->branch_true = old_block->branch_true; + new_block->branch_false = old_block->branch_false; + new_block->function_call = old_block->function_call; + new_block->return_block = old_block->return_block; + new_block->branch_reg = old_block->branch_reg; + new_block->branch_offset = old_block->branch_offset; + new_block->indirect_branches = std::move(old_block->indirect_branches); + + const Location old_begin{old_block->begin}; + Stack old_stack{std::move(old_block->stack)}; + *old_block = Block{}; + old_block->begin = old_begin; + old_block->end = pc; + old_block->end_class = EndClass::Branch; + old_block->cond = IR::Condition(true); + old_block->stack = old_stack; + old_block->branch_true = new_block; + old_block->branch_false = nullptr; } Token OpcodeToken(Opcode opcode) { @@ -141,7 +137,7 @@ std::string NameOf(const Block& block) { void Stack::Push(Token token, Location target) { entries.push_back({ - .token{token}, + .token = token, .target{target}, }); } @@ -177,24 +173,17 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(ObjectPool& block_pool, Location start_address) - : entrypoint{start_address}, labels{{ - .address{start_address}, - .block{block_pool.Create(Block{ - .begin{start_address}, - .end{start_address}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}, - .stack{}, - }} {} + : entrypoint{start_address} { + Label& label{labels.emplace_back()}; + label.address = start_address; + label.block = block_pool.Create(Block{}); + label.block->begin = start_address; + label.block->end = start_address; + label.block->end_class = EndClass::Branch; + label.block->cond = IR::Condition(true); + label.block->branch_true = nullptr; + label.block->branch_false = nullptr; +} CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) : env{env_}, block_pool{block_pool_}, program_start{start_address} { @@ -327,7 +316,8 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati // Insert the function into the list if it doesn't exist const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; const bool exists{it != functions.end()}; - const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()}; + const FunctionId call_id{exists ? static_cast(std::distance(functions.begin(), it)) + : functions.size()}; if (!exists) { functions.emplace_back(block_pool, cal_pc); } @@ -362,20 +352,14 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, } // Create a virtual block and a conditional block Block* const conditional_block{block_pool.Create()}; - Block virtual_block{ - .begin{block->begin.Virtual()}, - .end{block->begin.Virtual()}, - .end_class{EndClass::Branch}, - .cond{cond}, - .stack{block->stack}, - .branch_true{conditional_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + Block virtual_block{}; + virtual_block.begin = block->begin.Virtual(); + virtual_block.end = block->begin.Virtual(); + virtual_block.end_class = EndClass::Branch; + virtual_block.stack = block->stack; + virtual_block.cond = cond; + virtual_block.branch_true = conditional_block; + virtual_block.branch_false = nullptr; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); // Impersonate the visited block with a virtual block @@ -444,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += brx_table->branch_offset; + target += static_cast(brx_table->branch_offset); target += 8; targets.push_back(target); } @@ -455,8 +439,8 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, for (const u32 target : targets) { Block* const branch{AddLabel(block, block->stack, target, function_id)}; block->indirect_branches.push_back({ - .block{branch}, - .address{target}, + .block = branch, + .address = target, }); } block->cond = IR::Condition{true}; @@ -523,23 +507,17 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function if (label_it != function.labels.end()) { return label_it->block; } - Block* const new_block{block_pool.Create(Block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{stack}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}; + Block* const new_block{block_pool.Create()}; + new_block->begin = pc; + new_block->end = pc; + new_block->end_class = EndClass::Branch; + new_block->cond = IR::Condition(true); + new_block->stack = stack; + new_block->branch_true = nullptr; + new_block->branch_false = nullptr; function.labels.push_back(Label{ .address{pc}, - .block{new_block}, + .block = new_block, .stack{std::move(stack)}, }); return new_block; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index bd85afa1e..932d19c1d 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -45,7 +45,7 @@ constexpr MaskValue MaskValueFromEncoding(const char* encoding) { bit >>= 1; } } - return MaskValue{.mask{mask}, .value{value}}; + return MaskValue{.mask = mask, .value = value}; } struct InstEncoding { @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode{Opcode::name}, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST @@ -116,9 +116,9 @@ constexpr auto MakeFastLookupTableIndex(size_t index) { const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; if ((index & mask) == value) { encodings.at(element) = InstInfo{ - .high_mask{static_cast(encoding.mask_value.mask >> MASK_SHIFT)}, - .high_value{static_cast(encoding.mask_value.value >> MASK_SHIFT)}, - .opcode{encoding.opcode}, + .high_mask = static_cast(encoding.mask_value.mask >> MASK_SHIFT), + .high_value = static_cast(encoding.mask_value.value >> MASK_SHIFT), + .opcode = encoding.opcode, }; ++element; } diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp index 96453509d..008625cb3 100644 --- a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -97,11 +97,11 @@ std::optional TrackIndirectBranchTable(Environment& env } const u32 imnmx_immediate{static_cast(imnmx.immediate.Value())}; return IndirectBranchTableInfo{ - .cbuf_index{cbuf_index}, - .cbuf_offset{cbuf_offset}, - .num_entries{imnmx_immediate + 1}, - .branch_offset{brx_offset}, - .branch_reg{brx_reg}, + .cbuf_index = cbuf_index, + .cbuf_offset = cbuf_offset, + .num_entries = imnmx_immediate + 1, + .branch_offset = brx_offset, + .branch_reg = brx_reg, }; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c804c2a8e..02cef2645 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -558,7 +558,6 @@ private: const Node label{goto_stmt->label}; const u32 label_id{label->id}; const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; - const auto type{label_nested_stmt->type}; Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); @@ -566,7 +565,7 @@ private: Statement* const variable{pool.Create(Variable{}, label_id)}; Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; UpdateTreeUp(loop_stmt); - const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + body.insert(goto_stmt, *loop_stmt); Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; loop_stmt->children.push_front(*new_goto); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index ac1433dea..5a1b3a8fc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -31,9 +31,9 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dadd.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dadd.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp index ff7321862..723841496 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -25,9 +25,9 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dfma.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dfma.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp index 3e83d1c95..4a49299a0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -21,9 +21,9 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dmul.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dmul.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index b39950c84..b8c89810c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -23,9 +23,9 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; if (sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index c02a40209..80109ca0e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -19,8 +19,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o } const fcmp{insn}; const IR::F32 zero{v.ir.Imm32(0.0f)}; - const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; - const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; const IR::U32 src_reg{v.X(fcmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp index c5417775e..b9f4ee0d9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -29,9 +29,9 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(fset.pred)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 1e366fde0..035f8782a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -57,9 +57,9 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; if (f2f.src_size != f2f.dst_size) { fp_control.rounding = CastFpRounding(f2f.rounding); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 21ae92be1..cf3cf1ba6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -123,9 +123,9 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; } const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmz_mode}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = fmz_mode, }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { @@ -186,14 +186,14 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { } else if (f2i.dest_format == DestFormat::I64) { handled_special_case = true; result = IR::U64{ - v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; } } if (!handled_special_case && is_signed) { if (bitsize != 64) { result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; } else { - result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)}; } } @@ -211,6 +211,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { void TranslatorVisitor::F2I_reg(u64 insn) { union { + u64 raw; F2I base; BitField<20, 8, IR::Reg> src_reg; } const f2i{insn}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 18561bc9c..fa2a7807b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -24,9 +24,9 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index 343d91032..8ae437528 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -27,9 +27,9 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 72f0a18ae..06226b7ce 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -64,9 +64,9 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode } const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { @@ -124,4 +124,4 @@ void TranslatorVisitor::FMUL32I(u64 insn) { fmul32i.sat != 0, fmul32i.cc != 0, false); } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp index 8ff9db843..5f93a1513 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -29,9 +29,9 @@ void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const BooleanOp bop{fsetp.bop}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp index e42921a21..7550a8d4c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -28,9 +28,9 @@ void TranslatorVisitor::FSWZADD(u64 insn) { const IR::U32 swizzle{ir.Imm32(static_cast(fswzadd.swizzle))}; const IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{CastFpRounding(fswzadd.round)}, - .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = CastFpRounding(fswzadd.round), + .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 03e7bf047..f2738a93b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -34,9 +34,9 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; @@ -102,8 +102,9 @@ void TranslatorVisitor::HADD2_imm(u64 insn) { BitField<20, 9, u64> low; } const hadd2{insn}; - const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hadd2.low << 6) | static_cast((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hadd2.high << 22) | static_cast((hadd2.neg_high != 0 ? 1 : 0) << 31)}; HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 8b234bd6a..fd7986701 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -41,9 +41,9 @@ void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; @@ -143,8 +143,9 @@ void TranslatorVisitor::HFMA2_imm(u64 insn) { BitField<57, 2, HalfPrecision> precision; } const hfma2{insn}; - const u32 imm{static_cast(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hfma2.low << 6) | static_cast((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hfma2.high << 22) | static_cast((hfma2.neg_high != 0 ? 1 : 0) << 31)}; HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index 2451a6ef6..3f548ce76 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -35,9 +35,9 @@ void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; @@ -119,8 +119,9 @@ void TranslatorVisitor::HMUL2_imm(u64 insn) { BitField<44, 1, u64> abs_a; } const hmul2{insn}; - const u32 imm{static_cast(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hmul2.low << 6) | static_cast((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hmul2.high << 22) | static_cast((hmul2.neg_high != 0 ? 1 : 0) << 31)}; HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 7f1f4b88c..cca5b831f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -41,9 +41,9 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hset2.pred)}; @@ -106,8 +106,9 @@ void TranslatorVisitor::HSET2_imm(u64 insn) { BitField<20, 9, u64> low; } const hset2{insn}; - const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hset2.low << 6) | static_cast((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | static_cast((hset2.neg_high != 0 ? 1 : 0) << 31)}; HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, Swizzle::H1_H0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp index 3e2a23c92..b3931dae3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -43,9 +43,9 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; @@ -106,8 +106,10 @@ void TranslatorVisitor::HSETP2_imm(u64 insn) { BitField<20, 9, u64> low; } const hsetp2{insn}; - const u32 imm{static_cast(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{static_cast(hsetp2.low << 6) | + static_cast((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hsetp2.high << 22) | + static_cast((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, hsetp2.h_and != 0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 30b570ce4..88bbac0a5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -49,7 +49,7 @@ void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { } const IR::Value result{ir.UnpackUint2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } @@ -63,7 +63,7 @@ void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { } const IR::Value result{ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } @@ -156,7 +156,7 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { const auto [binding, offset_value]{CbufAddr(insn)}; const bool unaligned{cbuf.unaligned != 0}; const u32 offset{offset_value.U32()}; - const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4}; + const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; @@ -200,7 +200,7 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 sign_bit{static_cast(imm.is_negative != 0 ? (1ULL << 31) : 0)}; const u32 value{static_cast(imm.value) << 12}; return ir.Imm32(Common::BitCast(value | sign_bit)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 1493e1815..8ffd84867 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -68,7 +68,6 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } const iadd{insn}; const bool po{iadd.three_for_po == 3}; - const bool neg_a{!po && iadd.neg_a != 0}; if (!po && iadd.neg_b != 0) { op_b = v.ir.INeg(op_b); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index e8b5ae1d2..5a0fc36a0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -131,7 +131,7 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { } const IR::Value vector{v.ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; ++i) { - v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast(i))}); } break; } @@ -170,4 +170,4 @@ void TranslatorVisitor::I2F_imm(u64 insn) { } } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index ae3ecea32..2300088e3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -50,7 +50,7 @@ void TranslatorVisitor::LDC(u64 insn) { } const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; for (int i = 0; i < 2; ++i) { - X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index 68963c8ea..e24b49721 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -40,7 +40,6 @@ std::pair GetSize(u64 insn) { BitField<48, 3, Size> size; } const encoding{insn}; - const Size nnn = encoding.size; switch (encoding.size) { case Size::U8: return {8, false}; @@ -99,7 +98,7 @@ void TranslatorVisitor::LDL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } X(dest, ir.LoadLocal(word_offset)); @@ -123,11 +122,11 @@ void TranslatorVisitor::LDS(u64 insn) { break; case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } for (int element = 0; element < bit_size / 32; ++element) { - X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast(element))}); } break; } @@ -156,7 +155,7 @@ void TranslatorVisitor::STL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(reg, bit_size / 32)) { + if (!IR::IsAligned(reg, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned source register"); } ir.WriteLocal(word_offset, src); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 71688b1d7..36c5cff2f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal64(address)}; for (int i = 0; i < 2; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } @@ -125,7 +125,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal128(address)}; for (int i = 0; i < 4; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index b2da079f9..95d416586 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -199,7 +199,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, if (tex.dc != 0) { value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); } else { - value = IR::F32{v.ir.CompositeExtract(sample, element)}; + value = IR::F32{v.ir.CompositeExtract(sample, static_cast(element))}; } v.F(dest_reg, value); ++dest_reg; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index d5fda20f4..fe2c7db85 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -53,7 +53,7 @@ constexpr std::array RGBA_LUT{ R | G | B | A, // }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp index beab515ad..2ba9c1018 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -37,7 +37,7 @@ union Encoding { BitField<36, 13, u64> cbuf_offset; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 623b8fc23..0863bdfcd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -56,7 +56,7 @@ union Encoding { BitField<53, 4, u64> encoding; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index 8c7e04bca..0459e5473 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -54,7 +54,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional cbuf_offset) { if (((txq.mask >> element) & 1) == 0) { continue; } - v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)}); + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast(element))}); ++dest_reg; } } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp index af13b3fcc..ec5e74f6d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -69,7 +69,6 @@ void TranslatorVisitor::VSETP(u64 insn) { const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vsetp.src_b_imm)) : GetReg20(insn)}; const u32 a_selector{static_cast(vsetp.src_a_selector)}; - const u32 b_selector{is_b_imm ? 0U : static_cast(vsetp.src_b_selector)}; const VideoWidth a_width{vsetp.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 1c03ee82a..edbfcd308 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { @@ -22,8 +23,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { auto& cbufs{info.constant_buffer_descriptors}; cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), ConstantBufferDescriptor{ - .index{index}, - .count{1}, + .index = index, + .count = 1, }); } @@ -91,7 +92,7 @@ void SetAttribute(Info& info, IR::Attribute attribute) { } void VisitUsages(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: case IR::Opcode::CompositeConstructF16x3: case IR::Opcode::CompositeConstructF16x4: @@ -209,7 +210,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: case IR::Opcode::UndefU8: @@ -236,7 +237,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU16: case IR::Opcode::GetCbufS16: case IR::Opcode::UndefU16: @@ -271,7 +272,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::UndefU64: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS8: @@ -314,7 +315,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::DemoteToHelperInvocation: info.uses_demote_to_helper_invocation = true; break; @@ -361,7 +362,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } else { throw NotImplementedException("Constant buffer with non-immediate index"); } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: info.used_constant_buffer_types |= IR::Type::U8; @@ -443,7 +444,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } void VisitFpModifiers(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::FPAdd16: case IR::Opcode::FPFma16: case IR::Opcode::FPMul16: @@ -540,7 +541,6 @@ void GatherInfoFromHeader(Environment& env, Info& info) { info.stores_position |= header.vtg.omap_systemb.position != 0; } } - } // Anonymous namespace void CollectShaderInfoPass(Environment& env, IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 1720d7a09..61fbbe04c 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -58,7 +58,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (is_lhs_immediate && !is_rhs_immediate) { IR::Inst* const rhs_inst{rhs.InstRecursive()}; - if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg(lhs), Arg(rhs_inst->Arg(1)))}; inst.SetArg(0, rhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -70,7 +70,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (!is_lhs_immediate && is_rhs_immediate) { const IR::Inst* const lhs_inst{lhs.InstRecursive()}; - if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg(rhs), Arg(lhs_inst->Arg(1)))}; inst.SetArg(0, lhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -123,7 +123,8 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { return false; } IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; - if (lhs_shl->Opcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) { + if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { return false; } if (lhs_shl->Arg(0).IsImmediate()) { @@ -131,7 +132,7 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; - if (lhs_mul->Opcode() != IR::Opcode::IMul32 || rhs_mul->Opcode() != IR::Opcode::IMul32) { + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) { return false; } if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { @@ -143,10 +144,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; - if (lhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } - if (rhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) { @@ -194,8 +195,9 @@ void FoldISub32(IR::Inst& inst) { // ISub32 is generally used to subtract two constant buffers, compare and replace this with // zero if they equal. const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { - return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 && - a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); + return a->GetOpcode() == IR::Opcode::GetCbufU32 && + b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && + a->Arg(1) == b->Arg(1); }}; IR::Inst* op_a{inst.Arg(0).InstRecursive()}; IR::Inst* op_b{inst.Arg(1).InstRecursive()}; @@ -204,15 +206,15 @@ void FoldISub32(IR::Inst& inst) { return; } // It's also possible a value is being added to a cbuf and then subtracted - if (op_b->Opcode() == IR::Opcode::IAdd32) { + if (op_b->GetOpcode() == IR::Opcode::IAdd32) { // Canonicalize local variables to simplify the following logic std::swap(op_a, op_b); } - if (op_b->Opcode() != IR::Opcode::GetCbufU32) { + if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) { return; } IR::Inst* const inst_cbuf{op_b}; - if (op_a->Opcode() != IR::Opcode::IAdd32) { + if (op_a->GetOpcode() != IR::Opcode::IAdd32) { return; } IR::Value add_op_a{op_a->Arg(0)}; @@ -250,7 +252,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const lhs_op{lhs_value.InstRecursive()}; IR::Inst* const rhs_op{rhs_value.InstRecursive()}; - if (lhs_op->Opcode() != IR::Opcode::FPMul32 || rhs_op->Opcode() != IR::Opcode::FPRecip32) { + if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || + rhs_op->GetOpcode() != IR::Opcode::FPRecip32) { return; } const IR::Value recip_source{rhs_op->Arg(0)}; @@ -260,8 +263,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const attr_a{recip_source.InstRecursive()}; IR::Inst* const attr_b{lhs_mul_source.InstRecursive()}; - if (attr_a->Opcode() != IR::Opcode::GetAttribute || - attr_b->Opcode() != IR::Opcode::GetAttribute) { + if (attr_a->GetOpcode() != IR::Opcode::GetAttribute || + attr_b->GetOpcode() != IR::Opcode::GetAttribute) { return; } if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) { @@ -304,7 +307,7 @@ void FoldLogicalNot(IR::Inst& inst) { return; } IR::Inst* const arg{value.InstRecursive()}; - if (arg->Opcode() == IR::Opcode::LogicalNot) { + if (arg->GetOpcode() == IR::Opcode::LogicalNot) { inst.ReplaceUsesWith(arg->Arg(0)); } } @@ -317,12 +320,12 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } if constexpr (op == IR::Opcode::BitCastF32U32) { - if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) { + if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) { // Replace the bitcast with a typed constant buffer read inst.ReplaceOpcode(IR::Opcode::GetCbufF32); inst.SetArg(0, arg_inst->Arg(0)); @@ -338,7 +341,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } @@ -347,7 +350,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { template IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence) { using Traits = LambdaTraits; - return IR::Value{func(Arg>(inst.Arg(I))...)}; + return IR::Value{func(Arg>(inst.Arg(I))...)}; } void FoldBranchConditional(IR::Inst& inst) { @@ -357,7 +360,7 @@ void FoldBranchConditional(IR::Inst& inst) { return; } const IR::Inst* cond_inst{cond.InstRecursive()}; - if (cond_inst->Opcode() == IR::Opcode::LogicalNot) { + if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) { const IR::Value true_label{inst.Arg(1)}; const IR::Value false_label{inst.Arg(2)}; // Remove negation on the conditional (take the parameter out of LogicalNot) and swap @@ -371,10 +374,10 @@ void FoldBranchConditional(IR::Inst& inst) { std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, IR::Opcode construct, u32 first_index) { IR::Inst* const inst{inst_value.InstRecursive()}; - if (inst->Opcode() == construct) { + if (inst->GetOpcode() == construct) { return inst->Arg(first_index); } - if (inst->Opcode() != insert) { + if (inst->GetOpcode() != insert) { return std::nullopt; } IR::Value value_index{inst->Arg(2)}; @@ -410,7 +413,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser } void ConstantPropagation(IR::Block& block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: return FoldGetRegister(inst); case IR::Opcode::GetPred: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0858a0bdd..90a65dd16 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -57,7 +57,7 @@ struct StorageInfo { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -80,7 +80,7 @@ bool IsGlobalMemory(const IR::Inst& inst) { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemoryWrite(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: case IR::Opcode::WriteGlobalS16: @@ -140,7 +140,7 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value zero{u32{0}}; - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -164,7 +164,7 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode()); } } @@ -184,7 +184,7 @@ std::optional TrackLowAddress(IR::Inst* inst) { // This address is expected to either be a PackUint2x32 or a IAdd64 IR::Inst* addr_inst{addr.InstRecursive()}; s32 imm_offset{0}; - if (addr_inst->Opcode() == IR::Opcode::IAdd64) { + if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { // If it's an IAdd64, get the immediate offset it is applying and grab the address // instruction. This expects for the instruction to be canonicalized having the address on // the first argument and the immediate offset on the second one. @@ -200,7 +200,7 @@ std::optional TrackLowAddress(IR::Inst* inst) { addr_inst = iadd_addr.Inst(); } // With IAdd64 handled, now PackUint2x32 is expected without exceptions - if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { + if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) { return std::nullopt; } // PackUint2x32 is expected to be generated from a vector @@ -210,20 +210,20 @@ std::optional TrackLowAddress(IR::Inst* inst) { } // This vector is expected to be a CompositeConstructU32x2 IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { + if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { return std::nullopt; } // Grab the first argument from the CompositeConstructU32x2, this is the low address. return LowAddrInfo{ .value{IR::U32{vector_inst->Arg(0)}}, - .imm_offset{imm_offset}, + .imm_offset = imm_offset, }; } /// Tries to track the storage buffer address used by a global memory instruction std::optional Track(const IR::Value& value, const Bias* bias) { const auto pred{[bias](const IR::Inst* inst) -> std::optional { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -256,9 +256,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ - .index{0}, - .offset_begin{0x110}, - .offset_end{0x610}, + .index = 0, + .offset_begin = 0x110, + .offset_end = 0x610, }; // Track the low address of the instruction const std::optional low_addr_info{TrackLowAddress(&inst)}; @@ -286,8 +286,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) info.set.insert(*storage_buffer); info.to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, - .inst{&inst}, - .block{&block}, + .inst = &inst, + .block = &block, }); } @@ -312,7 +312,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer /// Replace a global memory load instruction with its storage buffer equivalent void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; inst.ReplaceUsesWith(value); @@ -321,7 +321,7 @@ void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, /// Replace a global memory write instruction with its storage buffer equivalent void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); inst.Invalidate(); @@ -330,7 +330,7 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index /// Replace a global memory instruction with its storage buffer equivalent void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -348,7 +348,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); default: - throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); } } } // Anonymous namespace @@ -366,9 +366,9 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { u32 storage_index{}; for (const StorageBufferAddr& storage_buffer : info.set) { program.info.storage_buffers_descriptors.push_back({ - .cbuf_index{storage_buffer.index}, - .cbuf_offset{storage_buffer.offset}, - .count{1}, + .cbuf_index = storage_buffer.index, + .cbuf_offset = storage_buffer.offset, + .count = 1, .is_written{info.writes.contains(storage_buffer)}, }); ++storage_index; diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 8790b48f2..38af72dfe 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -22,7 +22,8 @@ void IdentityRemovalPass(IR::Program& program) { inst->SetArg(i, arg.Inst()->Arg(0)); } } - if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { + if (inst->GetOpcode() == IR::Opcode::Identity || + inst->GetOpcode() == IR::Opcode::Void) { to_invalidate.push_back(&*inst); inst = block->Instructions().erase(inst); } else { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 0d2c91ed6..52576b07f 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -123,7 +123,7 @@ IR::Opcode Replace(IR::Opcode op) { void LowerFp16ToFp32(IR::Program& program) { for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { - inst.ReplaceOpcode(Replace(inst.Opcode())); + inst.ReplaceOpcode(Replace(inst.GetOpcode())); } } } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index ca36253d1..346fcc377 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -116,7 +116,7 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { } [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { - return inst.Opcode() == IR::Opcode::Phi; + return inst.GetOpcode() == IR::Opcode::Phi; } enum class Status { @@ -278,7 +278,7 @@ private: }; void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::SetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { pass.WriteVariable(reg, block, inst.Arg(1)); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 290ce4179..c8aee3d3d 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -30,7 +30,7 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector; IR::Opcode IndexedInstruction(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BoundImageSampleImplicitLod: return IR::Opcode::ImageSampleImplicitLod; @@ -67,7 +67,7 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { } bool IsBindless(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: @@ -91,7 +91,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGradient: return false; default: - throw InvalidArgument("Invalid opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); } } @@ -100,7 +100,7 @@ bool IsTextureInstruction(const IR::Inst& inst) { } std::optional TryGetConstBuffer(const IR::Inst* inst) { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -134,14 +134,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { addr = *track_addr; } else { addr = ConstBufferAddr{ - .index{env.TextureBoundBuffer()}, - .offset{inst.Arg(0).U32()}, + .index = env.TextureBoundBuffer(), + .offset = inst.Arg(0).U32(), }; } return TextureInst{ .cbuf{addr}, - .inst{&inst}, - .block{block}, + .inst = &inst, + .block = block, }; } @@ -211,7 +211,7 @@ void TexturePass(Environment& env, IR::Program& program) { const auto& cbuf{texture_inst.cbuf}; auto flags{inst->Flags()}; - switch (inst->Opcode()) { + switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); inst->SetFlags(flags); @@ -235,16 +235,16 @@ void TexturePass(Environment& env, IR::Program& program) { u32 index; if (flags.type == TextureType::Buffer) { index = descriptors.Add(TextureBufferDescriptor{ - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } else { index = descriptors.Add(TextureDescriptor{ - .type{flags.type}, - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .type = flags.type, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } inst->SetArg(0, IR::Value{index}); diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 4080b37cc..dbec96d84 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -14,14 +14,14 @@ namespace Shader::Optimization { static void ValidateTypes(const IR::Program& program) { for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { - if (inst.Opcode() == IR::Opcode::Phi) { + if (inst.GetOpcode() == IR::Opcode::Phi) { // Skip validation on phi nodes continue; } const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { const IR::Type t1{inst.Arg(i).Type()}; - const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; + const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)}; if (!IR::AreTypesCompatible(t1, t2)) { throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); } diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp index ac9912738..aa6e86593 100644 --- a/src/tests/common/unique_function.cpp +++ b/src/tests/common/unique_function.cpp @@ -17,10 +17,12 @@ struct Noisy { Noisy& operator=(Noisy&& rhs) noexcept { state = "Move assigned"; rhs.state = "Moved away"; + return *this; } Noisy(const Noisy&) : state{"Copied constructed"} {} Noisy& operator=(const Noisy&) { state = "Copied assigned"; + return *this; } std::string state; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 71b07c194..3166a69dc 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -203,7 +203,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) +target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 893258b4a..57e2d569c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -447,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .dynamicStateCount = static_cast(dynamic_states.size()), .pDynamicStates = dynamic_states.data(), }; - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, .requiredSubgroupSize = GuestWarpSize, @@ -457,15 +457,16 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!spv_modules[stage]) { continue; } - [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast(stage)), - .module = *spv_modules[stage], - .pName = "main", - .pSpecializationInfo = nullptr, - }); + [[maybe_unused]] auto& stage_ci = + shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); /* if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { stage_ci.pNext = &subgroup_size_ci; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 23bf84a92..fcebb8f6e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -u64 MakeCbufKey(u32 index, u32 offset) { +static u64 MakeCbufKey(u32 index, u32 offset) { return (static_cast(index) << 32) | offset; } @@ -638,6 +638,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, + .fixed_state_point_size{}, }; } @@ -748,7 +749,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Environment& env{*envs[env_index]}; ++env_index; - const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); } diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index b2dcd74ab..991afe521 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include #include diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index e42b091c5..70328680d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -279,7 +279,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -[[nodiscard]] std::vector TransformBufferCopies( +[[maybe_unused]] [[nodiscard]] std::vector TransformBufferCopies( std::span copies, size_t buffer_offset) { std::vector result(copies.size()); std::ranges::transform( -- cgit v1.2.3 From 373f75d944473731408d7a72c967d5c4b37af5bb Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:22:31 -0400 Subject: shader: Add shader loop safety check settings Also add a setting for enable Nsight Aftermath. --- src/common/settings.h | 3 + .../backend/glasm/emit_glasm_instructions.h | 2 + .../backend/glasm/emit_glasm_not_implemented.cpp | 8 +++ .../backend/spirv/emit_spirv_context_get_set.cpp | 24 +++++--- .../backend/spirv/emit_spirv_instructions.h | 2 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 14 ++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 3 + src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../frontend/maxwell/structured_control_flow.cpp | 42 ++++++++++++-- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 66 +++++++++++++++------- src/video_core/vulkan_common/vulkan_device.cpp | 4 +- src/yuzu/configuration/config.cpp | 4 ++ src/yuzu/configuration/configure_debug.cpp | 8 +++ src/yuzu/configuration/configure_debug.ui | 26 +++++++++ src/yuzu_cmd/config.cpp | 2 + src/yuzu_cmd/default_ini.h | 8 +++ 16 files changed, 183 insertions(+), 35 deletions(-) (limited to 'src/common') diff --git a/src/common/settings.h b/src/common/settings.h index ce1bc647d..ac0590690 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -308,6 +308,9 @@ struct Values { // Renderer Setting renderer_backend{RendererBackend::OpenGL, "backend"}; BasicSetting renderer_debug{false, "debug"}; + BasicSetting enable_nsight_aftermath{false, "nsight_aftermath"}; + BasicSetting disable_shader_loop_safety_checks{false, + "disable_shader_loop_safety_checks"}; Setting vulkan_device{0, "vulkan_device"}; Setting resolution_factor{1, "resolution_factor"}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index c9f4826ce..fef9ff9be 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -42,6 +42,8 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitSetLoopSafetyVariable(EmitContext& ctx); +void EmitGetLoopSafetyVariable(EmitContext& ctx); void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 95bcbd750..60735fe31 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -153,6 +153,14 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } +void EmitSetLoopSafetyVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetLoopSafetyVariable(EmitContext& ctx) { + NotImplemented(); +} + void EmitGetZFlag(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 442a958a5..42fff74e3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -163,35 +163,43 @@ Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 inde } // Anonymous namespace void EmitGetRegister(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetRegister(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetPred(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetPred(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetGotoVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetGotoVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetIndirectBranchVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetIndirectBranchVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); +} + +void EmitSetLoopSafetyVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetLoopSafetyVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); } Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 1181e7b4f..e3e5b03fe 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -43,6 +43,8 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitSetLoopSafetyVariable(EmitContext& ctx); +void EmitGetLoopSafetyVariable(EmitContext& ctx); Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index e9fd41237..6c37af5e7 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -125,6 +125,12 @@ U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { } } +void IREmitter::SetPred(IR::Pred pred, const U1& value) { + if (pred != IR::Pred::PT) { + Inst(Opcode::SetPred, pred, value); + } +} + U1 IREmitter::GetGotoVariable(u32 id) { return Inst(Opcode::GetGotoVariable, id); } @@ -141,8 +147,12 @@ void IREmitter::SetIndirectBranchVariable(const U32& value) { Inst(Opcode::SetIndirectBranchVariable, value); } -void IREmitter::SetPred(IR::Pred pred, const U1& value) { - Inst(Opcode::SetPred, pred, value); +U32 IREmitter::GetLoopSafetyVariable(u32 id) { + return Inst(Opcode::GetLoopSafetyVariable, id); +} + +void IREmitter::SetLoopSafetyVariable(u32 id, const U32& counter) { + Inst(Opcode::SetLoopSafetyVariable, id, counter); } U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index bb3500c54..7caab1f61 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -55,6 +55,9 @@ public: [[nodiscard]] U32 GetIndirectBranchVariable(); void SetIndirectBranchVariable(const U32& value); + [[nodiscard]] U32 GetLoopSafetyVariable(u32 id); + void SetLoopSafetyVariable(u32 id, const U32& counter); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8a8d0d759..e87aeddd5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -32,6 +32,8 @@ OPCODE(GetGotoVariable, U1, U32, OPCODE(SetGotoVariable, Void, U32, U1, ) OPCODE(GetIndirectBranchVariable, U32, ) OPCODE(SetIndirectBranchVariable, Void, U32, ) +OPCODE(GetLoopSafetyVariable, U32, U32, ) +OPCODE(SetLoopSafetyVariable, Void, U32, U32, ) OPCODE(GetCbufU8, U32, U32, U32, ) OPCODE(GetCbufS8, U32, U32, U32, ) OPCODE(GetCbufU16, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c1e0646e6..b2b8c492a 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -9,11 +9,13 @@ #include #include #include +#include #include #include +#include "common/settings.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -739,8 +741,25 @@ private: } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; - if (current_block) { - current_block->AddBranch(loop_header_block); + const u32 this_loop_id{loop_id++}; + + if (Settings::values.disable_shader_loop_safety_checks) { + if (current_block) { + current_block->AddBranch(loop_header_block); + } + } else { + IR::Block* const init_block{block_pool.Create(inst_pool)}; + IR::IREmitter ir{*init_block}; + ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(0x2000)); + + if (current_block) { + current_block->AddBranch(init_block); + } + init_block->AddBranch(loop_header_block); + + auto& init_node{syntax_list.emplace_back()}; + init_node.type = IR::AbstractSyntaxNode::Type::Block; + init_node.data.block = init_block; } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; @@ -758,7 +777,16 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + if (!Settings::values.disable_shader_loop_safety_checks) { + const IR::U32 old_counter{ir.GetLoopSafetyVariable(this_loop_id)}; + const IR::U32 new_counter{ir.ISub(old_counter, ir.Imm32(1))}; + ir.SetLoopSafetyVariable(this_loop_id, new_counter); + + const IR::U1 safety_cond{ir.INotEqual(new_counter, ir.Imm32(0))}; + cond = ir.LogicalAnd(cond, safety_cond); + } + cond = ir.ConditionRef(cond); IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); @@ -863,8 +891,14 @@ private: ObjectPool& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; - // TODO: Make this constexpr when std::vector is constexpr + u32 loop_id{}; + +// TODO: C++20 Remove this when all compilers support constexpr std::vector +#if __cpp_lib_constexpr_vector >= 201907 + static constexpr Flow::Block dummy_flow_block; +#else const Flow::Block dummy_flow_block; +#endif }; } // Anonymous namespace diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index e54499ba5..a4ba393ef 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -48,73 +48,91 @@ struct GotoVariable : FlagTag { u32 index; }; +struct LoopSafetyVariable { + LoopSafetyVariable() = default; + explicit LoopSafetyVariable(u32 index_) : index{index_} {} + + auto operator<=>(const LoopSafetyVariable&) const noexcept = default; + + u32 index; +}; + struct IndirectBranchVariable { auto operator<=>(const IndirectBranchVariable&) const noexcept = default; }; -using Variant = std::variant; -using ValueMap = boost::container::flat_map>; +using Variant = + std::variant; +using ValueMap = boost::container::flat_map; struct DefTable { - const IR::Value& Def(IR::Block* block, IR::Reg variable) noexcept { + const IR::Value& Def(IR::Block* block, IR::Reg variable) { return block->SsaRegValue(variable); } - void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) { block->SetSsaRegValue(variable, value); } - const IR::Value& Def(IR::Block* block, IR::Pred variable) noexcept { + const IR::Value& Def(IR::Block* block, IR::Pred variable) { return preds[IR::PredIndex(variable)][block]; } - void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) { preds[IR::PredIndex(variable)].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, GotoVariable variable) noexcept { + const IR::Value& Def(IR::Block* block, GotoVariable variable) { return goto_vars[variable.index][block]; } - void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) { goto_vars[variable.index].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, IndirectBranchVariable) noexcept { + const IR::Value& Def(IR::Block* block, LoopSafetyVariable variable) { + return loop_safety_vars[variable.index][block]; + } + void SetDef(IR::Block* block, LoopSafetyVariable variable, const IR::Value& value) { + loop_safety_vars[variable.index].insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, IndirectBranchVariable) { return indirect_branch_var[block]; } - void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) { indirect_branch_var.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, ZeroFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, ZeroFlagTag) { return zero_flag[block]; } - void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) { zero_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, SignFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, SignFlagTag) { return sign_flag[block]; } - void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) { sign_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, CarryFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, CarryFlagTag) { return carry_flag[block]; } - void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) { carry_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, OverflowFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, OverflowFlagTag) { return overflow_flag[block]; } - void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) { overflow_flag.insert_or_assign(block, value); } std::array preds; boost::container::flat_map goto_vars; + boost::container::flat_map loop_safety_vars; ValueMap indirect_branch_var; ValueMap zero_flag; ValueMap sign_flag; @@ -134,6 +152,10 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { return IR::Opcode::UndefU1; } +IR::Opcode UndefOpcode(const LoopSafetyVariable&) noexcept { + return IR::Opcode::UndefU32; +} + IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { return IR::Opcode::UndefU32; } @@ -315,6 +337,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetGotoVariable: pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; + case IR::Opcode::SetLoopSafetyVariable: + pass.WriteVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block, inst.Arg(0)); + break; case IR::Opcode::SetIndirectBranchVariable: pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); break; @@ -343,6 +368,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetGotoVariable: inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); break; + case IR::Opcode::GetLoopSafetyVariable: + inst.ReplaceUsesWith(pass.ReadVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block)); + break; case IR::Opcode::GetIndirectBranchVariable: inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); break; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8eb37a77a..bf063c047 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -467,7 +467,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; - if (nv_device_diagnostics_config) { + if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { nsight_aftermath_tracker = std::make_unique(); diagnostics_nv = { @@ -781,7 +781,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); - if (Settings::values.renderer_debug) { + if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index a5e032959..dc69574a9 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -824,6 +824,8 @@ void Config::ReadRendererValues() { if (global) { ReadBasicSetting(Settings::values.renderer_debug); + ReadBasicSetting(Settings::values.enable_nsight_aftermath); + ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks); } qt_config->endGroup(); @@ -1353,6 +1355,8 @@ void Config::SaveRendererValues() { if (global) { WriteBasicSetting(Settings::values.renderer_debug); + WriteBasicSetting(Settings::values.enable_nsight_aftermath); + WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks); } qt_config->endGroup(); diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 8fceb3878..f7e29dbd7 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -45,8 +45,13 @@ void ConfigureDebug::SetConfiguration() { ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); ui->enable_cpu_debugging->setEnabled(runtime_lock); ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue()); + ui->enable_nsight_aftermath->setEnabled(runtime_lock); + ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue()); ui->disable_macro_jit->setEnabled(runtime_lock); ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); + ui->disable_loop_safety_checks->setEnabled(runtime_lock); + ui->disable_loop_safety_checks->setChecked( + Settings::values.disable_shader_loop_safety_checks.GetValue()); ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue()); } @@ -61,6 +66,9 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.use_auto_stub = ui->use_auto_stub->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); + Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); + Settings::values.disable_shader_loop_safety_checks = + ui->disable_loop_safety_checks->isChecked(); Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); Settings::values.extended_logging = ui->extended_logging->isChecked(); Debugger::ToggleConsole(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index 1260ad6f0..c8baf2921 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -125,6 +125,16 @@ + + + + When checked, it enables Nsight Aftermath crash dumps + + + Enable Nsight Aftermath + + + @@ -138,6 +148,16 @@ + + + + When checked, it executes shaders without loop logic changes + + + Disable Loop safety checks + + + @@ -252,11 +272,17 @@ log_filter_edit toggle_console + extended_logging open_log_button homebrew_args_edit enable_graphics_debugging + enable_nsight_aftermath + disable_macro_jit + disable_loop_safety_checks reporting_services quest_flag + use_debug_asserts + use_auto_stub diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 3e22fee37..763df6dd6 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -444,6 +444,8 @@ void Config::ReadValues() { // Renderer ReadSetting("Renderer", Settings::values.renderer_backend); ReadSetting("Renderer", Settings::values.renderer_debug); + ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); + ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks); ReadSetting("Renderer", Settings::values.vulkan_device); ReadSetting("Renderer", Settings::values.fullscreen_mode); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 88d33ecab..a6ca7b6cd 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -221,6 +221,14 @@ backend = # 0 (default): Disabled, 1: Enabled debug = +# Enable Nsight Aftermath crash dumps +# 0 (default): Disabled, 1: Enabled +nsight_aftermath = + +# Disable shader loop safety checks, executing the shader without loop logic changes +# 0 (default): Disabled, 1: Enabled +disable_shader_loop_safety_checks = + # Which Vulkan physical device to use (defaults to 0) vulkan_device = -- cgit v1.2.3 From 61cd7dd30128633b656ce3264da74bef1ba00bb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 14 Jun 2021 02:27:49 -0300 Subject: shader: Add logging --- src/common/logging/filter.cpp | 4 ++++ src/common/logging/types.h | 4 ++++ src/shader_recompiler/backend/glasm/emit_glasm.cpp | 2 +- .../backend/glasm/emit_glasm_context_get_set.cpp | 6 ++++-- src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | 4 ++-- .../backend/glasm/emit_glasm_not_implemented.cpp | 2 +- src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp | 8 ++++---- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 8 ++++---- src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp | 6 +++--- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 4 ++-- src/shader_recompiler/backend/spirv/emit_spirv_special.cpp | 4 ++-- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 2 +- .../maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | 2 +- .../frontend/maxwell/translate/impl/move_special_register.cpp | 8 ++++---- src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | 2 +- 15 files changed, 38 insertions(+), 28 deletions(-) (limited to 'src/common') diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 4f2cc29e1..f055f0e11 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -144,6 +144,10 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Render, Software) \ SUB(Render, OpenGL) \ SUB(Render, Vulkan) \ + CLS(Shader) \ + SUB(Shader, SPIRV) \ + SUB(Shader, GLASM) \ + SUB(Shader, GLSL) \ CLS(Audio) \ SUB(Audio, DSP) \ SUB(Audio, Sink) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 88b0e9c01..7ad0334fc 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -114,6 +114,10 @@ enum class Class : u8 { Render_Software, ///< Software renderer backend Render_OpenGL, ///< OpenGL backend Render_Vulkan, ///< Vulkan backend + Shader, ///< Shader recompiler + Shader_SPIRV, ///< Shader SPIR-V code generation + Shader_GLASM, ///< Shader GLASM code generation + Shader_GLSL, ///< Shader GLSL code generation Audio, ///< Audio emulation Audio_DSP, ///< The HLE implementation of the DSP Audio_Sink, ///< Emulator audio output backend diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index fc01797b6..832b4fd40 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -253,7 +253,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } } if (!ctx.reg_alloc.IsEmpty()) { - // LOG_WARNING ...; + LOG_WARNING(Shader_GLASM, "Register leak after generating code"); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index c1df7a342..20b925877 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -145,14 +145,16 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { ctx.Add("MOV.F result.layer.x,{};", value); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, + "Layer stored outside of geometry shader not supported by device"); } break; case IR::Attribute::ViewportIndex: if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { ctx.Add("MOV.F result.viewport.x,{};", value); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, + "Viewport stored outside of geometry shader not supported by device"); } break; case IR::Attribute::PointSize: diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 81d5fe72c..09e3a9b82 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -139,12 +139,12 @@ void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR:: std::string GradOffset(const IR::Value& offset) { if (offset.IsImmediate()) { - // LOG_WARNING immediate + LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate"); return ""; } IR::Inst* const vector{offset.InstRecursive()}; if (!vector->AreAllArgsImmediates()) { - // LOG_WARNING elements not immediate + LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate"); return ""; } switch (vector->NumArgs()) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 60735fe31..a487a0744 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -115,7 +115,7 @@ void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) { void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { if (!stream.IsImmediate()) { - // LOG_WARNING not immediate + LOG_WARNING(Shader_GLASM, "Stream is not immediate"); } ctx.reg_alloc.Consume(stream); ctx.Add("ENDPRIM;"); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index 8cec5ee7e..544d475b4 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -115,7 +115,7 @@ void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { if (ctx.profile.support_derivative_control) { ctx.Add("DDX.FINE {}.x,{};", inst, p); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); ctx.Add("DDX {}.x,{};", inst, p); } } @@ -124,7 +124,7 @@ void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { if (ctx.profile.support_derivative_control) { ctx.Add("DDY.FINE {}.x,{};", inst, p); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); ctx.Add("DDY {}.x,{};", inst, p); } } @@ -133,7 +133,7 @@ void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { if (ctx.profile.support_derivative_control) { ctx.Add("DDX.COARSE {}.x,{};", inst, p); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); ctx.Add("DDX {}.x,{};", inst, p); } } @@ -142,7 +142,7 @@ void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { if (ctx.profile.support_derivative_control) { ctx.Add("DDY.COARSE {}.x,{};", inst, p); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); ctx.Add("DDY {}.x,{};", inst, p); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index cba420cda..14a99750d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -294,7 +294,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit Id main_func) { const Info& info{program.info}; if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { - // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); + LOG_ERROR(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); } else if (info.uses_fp32_denorms_flush) { if (profile.support_fp32_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); @@ -307,7 +307,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); } else { - // LOG_WARNING(HW_GPU, "Fp32 denorm preserve used in shader without host support"); + LOG_WARNING(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); } } if (!profile.support_separate_denorm_behavior) { @@ -315,7 +315,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit return; } if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { - // LOG_ERROR(HW_GPU, "Fp16 denorm flush and preserve on the same shader"); + LOG_ERROR(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); } else if (info.uses_fp16_denorms_flush) { if (profile.support_fp16_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); @@ -328,7 +328,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); } else { - // LOG_WARNING(HW_GPU, "Fp16 denorm preserve used in shader without host support"); + LOG_WARNING(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); } } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 053800eb7..9af8bb9e1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -73,7 +73,7 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& const auto [scope, semantics]{AtomicArgs(ctx)}; return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING(..., "Int64 Atomics not supported, fallback to non-atomic"); + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; @@ -140,7 +140,7 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer_1{SharedPointer(ctx, offset, 0)}; const Id pointer_2{SharedPointer(ctx, offset, 1)}; const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; @@ -266,7 +266,7 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING(..., "Int64 Atomics not supported, fallback to non-atomic"); + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index cf842e1e0..647804814 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -39,7 +39,7 @@ public: } const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { - // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING"); + LOG_WARNING(Shader_SPIRV, "Not all arguments in PTP are immediate, ignoring"); return; } const IR::Opcode opcode{values[0]->GetOpcode()}; @@ -442,7 +442,7 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { const auto info{inst->Flags()}; if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) { - // LOG_WARNING(..., "Typeless image read not supported by host"); + LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host"); return ctx.ConstantNull(ctx.U32[4]); } return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4], diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 072a3b1bd..9e7eb3cb1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -131,7 +131,7 @@ void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { if (stream.IsImmediate()) { ctx.OpEmitStreamVertex(ctx.Def(stream)); } else { - // LOG_WARNING(..., "EmitVertex's stream is not constant"); + LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); ctx.OpEmitStreamVertex(ctx.u32_zero_value); } // Restore fixed pipeline point size after emitting the vertex @@ -142,7 +142,7 @@ void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { if (stream.IsImmediate()) { ctx.OpEndStreamPrimitive(ctx.Def(stream)); } else { - // LOG_WARNING(..., "EndPrimitive's stream is not constant"); + LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); ctx.OpEndStreamPrimitive(ctx.u32_zero_value); } } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6c37af5e7..d2ac2acac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -270,7 +270,7 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { case FlowTest::RGT: return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); case FlowTest::FCSM_TR: - // LOG_WARNING(ShaderDecompiler, "FCSM_TR CC State (Stubbed)"); + LOG_WARNING(Shader, "(STUBBED) FCSM_TR"); return ir.Imm1(false); case FlowTest::CSM_TA: case FlowTest::CSM_TR: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp index edd6220a8..9b85f8059 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -46,7 +46,7 @@ void TranslatorVisitor::ISBERD(u64 insn) { if (isberd.shift != Shift::Default) { throw NotImplementedException("Shift {}", isberd.shift.Value()); } - // LOG_WARNING(..., "ISBERD is stubbed"); + LOG_WARNING(Shader, "(STUBBED) called"); X(isberd.dest_reg, X(isberd.src_reg)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index fe3cdfa96..20cb2674e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -118,7 +118,7 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_INVOCATION_INFO: - // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO"); return ir.Imm32(0x00ff'0000); case SpecialRegister::SR_TID: { const IR::Value tid{ir.LocalInvocationId()}; @@ -140,10 +140,10 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); case SpecialRegister::SR_WSCALEFACTOR_XY: - // LOG_WARNING(..., "SR_WSCALEFACTOR_XY is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: - // LOG_WARNING(..., "SR_WSCALEFACTOR_Z is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); @@ -160,7 +160,7 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_Y_DIRECTION: return ir.BitCast(ir.YDirection()); case SpecialRegister::SR_AFFINITY: - // LOG_WARNING(..., "SR_AFFINITY is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); return ir.Imm32(0); // This is the default value hardware returns. default: throw NotImplementedException("S2R special register {}", special_register); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index 0793611ff..7ce370f09 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -48,7 +48,7 @@ void TranslatorVisitor::VOTE(u64 insn) { } void TranslatorVisitor::VOTE_vtg(u64) { - // LOG_WARNING(ShaderDecompiler, "VOTE.VTG: Stubbed!"); + LOG_WARNING(Shader, "(STUBBED) called"); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 1152d66ddd4e7b29b53e01990fef77e4cff20e24 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:28:48 -0400 Subject: general: Add setting shader_backend GLASM is getting good enough that we can move it out of advanced graphics settings. This removes the setting `use_assembly_shaders`, opting for a enum class `shader_backend`. This comes with the benefits that it is extensible for additional shader backends besides GLSL and GLASM, and this will work better with a QComboBox. Qt removes the related assembly shader setting from the Advanced Graphics section and places it as a new QComboBox in the API Settings group. This will replace the Vulkan device selector when OpenGL is selected. Additionally, mark all of the custom anisotropic filtering settings as "WILL BREAK THINGS", as that is the case with a select few games. --- src/common/settings.cpp | 4 +- src/common/settings.h | 8 +- src/core/telemetry_session.cpp | 4 +- src/video_core/renderer_opengl/gl_device.cpp | 10 +- src/yuzu/configuration/config.cpp | 7 +- src/yuzu/configuration/config.h | 3 +- src/yuzu/configuration/configure_graphics.cpp | 76 ++++++++----- src/yuzu/configuration/configure_graphics.h | 4 +- src/yuzu/configuration/configure_graphics.ui | 118 ++++++++++++++++----- .../configuration/configure_graphics_advanced.cpp | 7 -- .../configuration/configure_graphics_advanced.h | 1 - .../configuration/configure_graphics_advanced.ui | 18 +--- src/yuzu_cmd/config.cpp | 2 +- src/yuzu_cmd/default_ini.h | 7 +- 14 files changed, 182 insertions(+), 87 deletions(-) (limited to 'src/common') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index bf5514386..66268ea0f 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -57,7 +57,7 @@ void LogSettings() { log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); - log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); + log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); @@ -140,7 +140,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_nvdec_emulation.SetGlobal(true); values.accelerate_astc.SetGlobal(true); values.use_vsync.SetGlobal(true); - values.use_assembly_shaders.SetGlobal(true); + values.shader_backend.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); values.use_caches_gc.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index ac0590690..32dfb1d9f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -24,6 +24,12 @@ enum class RendererBackend : u32 { Vulkan = 1, }; +enum class ShaderBackend : u32 { + GLSL = 0, + GLASM = 1, + SPIRV = 2, +}; + enum class GPUAccuracy : u32 { Normal = 0, High = 1, @@ -334,7 +340,7 @@ struct Values { Setting accelerate_astc{true, "accelerate_astc"}; Setting use_vsync{true, "use_vsync"}; BasicSetting disable_fps_limit{false, "disable_fps_limit"}; - Setting use_assembly_shaders{false, "use_assembly_shaders"}; + Setting shader_backend{ShaderBackend::GLASM, "shader_backend"}; Setting use_asynchronous_shaders{false, "use_asynchronous_shaders"}; Setting use_fast_gpu_time{true, "use_fast_gpu_time"}; Setting use_caches_gc{false, "use_caches_gc"}; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 066cb23e4..422de3a7d 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -233,8 +233,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, Settings::values.use_nvdec_emulation.GetValue()); AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); - AddField(field_type, "Renderer_UseAssemblyShaders", - Settings::values.use_assembly_shaders.GetValue()); + AddField(field_type, "Renderer_ShaderBackend", + static_cast(Settings::values.shader_backend.GetValue())); AddField(field_type, "Renderer_UseAsynchronousShaders", Settings::values.use_asynchronous_shaders.GetValue()); AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode.GetValue()); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 6818951f2..c4eeed53b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -172,9 +172,10 @@ Device::Device() { // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && - GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && - GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + use_assembly_shaders = + Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && + GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && + GLAD_GL_NV_transform_feedback2; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && @@ -187,7 +188,8 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { + if (Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && + !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index dc69574a9..52b3ed02e 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -814,7 +814,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_nvdec_emulation); ReadGlobalSetting(Settings::values.accelerate_astc); ReadGlobalSetting(Settings::values.use_vsync); - ReadGlobalSetting(Settings::values.use_assembly_shaders); + ReadGlobalSetting(Settings::values.shader_backend); ReadGlobalSetting(Settings::values.use_asynchronous_shaders); ReadGlobalSetting(Settings::values.use_fast_gpu_time); ReadGlobalSetting(Settings::values.use_caches_gc); @@ -1345,7 +1345,10 @@ void Config::SaveRendererValues() { WriteGlobalSetting(Settings::values.use_nvdec_emulation); WriteGlobalSetting(Settings::values.accelerate_astc); WriteGlobalSetting(Settings::values.use_vsync); - WriteGlobalSetting(Settings::values.use_assembly_shaders); + WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), + static_cast(Settings::values.shader_backend.GetValue(global)), + static_cast(Settings::values.shader_backend.GetDefault()), + Settings::values.shader_backend.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_shaders); WriteGlobalSetting(Settings::values.use_fast_gpu_time); WriteGlobalSetting(Settings::values.use_caches_gc); diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 96f9b6de1..4bbb9f1cd 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -180,5 +180,6 @@ private: // These metatype declarations cannot be in common/settings.h because core is devoid of QT Q_DECLARE_METATYPE(Settings::CPUAccuracy); -Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::GPUAccuracy); +Q_DECLARE_METATYPE(Settings::RendererBackend); +Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 4d5b4c0e6..463448dbf 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -26,19 +26,25 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ui->setupUi(this); + ui->backend->addItem(QStringLiteral("GLSL")); + ui->backend->addItem(tr("GLASM (NVIDIA Only)")); + ui->backend->addItem(QStringLiteral("SPIR-V")); + SetupPerGameUI(); SetConfiguration(); connect(ui->api, qOverload(&QComboBox::currentIndexChanged), this, [this] { - UpdateDeviceComboBox(); + UpdateAPILayout(); if (!Settings::IsConfiguringGlobal()) { ConfigurationShared::SetHighlight( - ui->api_layout, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); + ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); } }); connect(ui->device, qOverload(&QComboBox::activated), this, [this](int device) { UpdateDeviceSelection(device); }); + connect(ui->backend, qOverload(&QComboBox::activated), this, + [this](int backend) { UpdateShaderBackendSelection(backend); }); connect(ui->bg_button, &QPushButton::clicked, this, [this] { const QColor new_bg_color = QColorDialog::getColor(bg_color); @@ -48,6 +54,10 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) UpdateBackgroundColorButton(new_bg_color); }); + for (const auto& device : vulkan_devices) { + ui->device->addItem(device); + } + ui->bg_label->setVisible(Settings::IsConfiguringGlobal()); ui->bg_combobox->setVisible(!Settings::IsConfiguringGlobal()); } @@ -61,12 +71,21 @@ void ConfigureGraphics::UpdateDeviceSelection(int device) { } } +void ConfigureGraphics::UpdateShaderBackendSelection(int backend) { + if (backend == -1) { + return; + } + if (GetCurrentGraphicsBackend() == Settings::RendererBackend::OpenGL) { + shader_backend = static_cast(backend); + } +} + ConfigureGraphics::~ConfigureGraphics() = default; void ConfigureGraphics::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); - ui->api->setEnabled(runtime_lock); + ui->api_widget->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_nvdec_emulation->setEnabled(runtime_lock); @@ -83,7 +102,7 @@ void ConfigureGraphics::SetConfiguration() { ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); - ConfigurationShared::SetHighlight(ui->api_layout, + ConfigurationShared::SetHighlight(ui->api_widget, !Settings::values.renderer_backend.UsingGlobal()); ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, @@ -100,11 +119,10 @@ void ConfigureGraphics::SetConfiguration() { ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); } - UpdateBackgroundColorButton(QColor::fromRgb(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(), Settings::values.bg_blue.GetValue())); - UpdateDeviceComboBox(); + UpdateAPILayout(); } void ConfigureGraphics::ApplyConfiguration() { @@ -128,6 +146,9 @@ void ConfigureGraphics::ApplyConfiguration() { if (Settings::values.renderer_backend.UsingGlobal()) { Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); } + if (Settings::values.shader_backend.UsingGlobal()) { + Settings::values.shader_backend.SetValue(shader_backend); + } if (Settings::values.vulkan_device.UsingGlobal()) { Settings::values.vulkan_device.SetValue(vulkan_device); } @@ -139,15 +160,22 @@ void ConfigureGraphics::ApplyConfiguration() { } else { if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.renderer_backend.SetGlobal(true); + Settings::values.shader_backend.SetGlobal(true); Settings::values.vulkan_device.SetGlobal(true); } else { Settings::values.renderer_backend.SetGlobal(false); Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); - if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) { + switch (GetCurrentGraphicsBackend()) { + case Settings::RendererBackend::OpenGL: + Settings::values.shader_backend.SetGlobal(false); + Settings::values.vulkan_device.SetGlobal(true); + Settings::values.shader_backend.SetValue(shader_backend); + break; + case Settings::RendererBackend::Vulkan: + Settings::values.shader_backend.SetGlobal(true); Settings::values.vulkan_device.SetGlobal(false); Settings::values.vulkan_device.SetValue(vulkan_device); - } else { - Settings::values.vulkan_device.SetGlobal(true); + break; } } @@ -188,32 +216,32 @@ void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) { ui->bg_button->setIcon(color_icon); } -void ConfigureGraphics::UpdateDeviceComboBox() { - ui->device->clear(); - - bool enabled = false; - +void ConfigureGraphics::UpdateAPILayout() { if (!Settings::IsConfiguringGlobal() && ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + vulkan_device = Settings::values.vulkan_device.GetValue(true); + shader_backend = Settings::values.shader_backend.GetValue(true); + ui->device_widget->setEnabled(false); + ui->backend_widget->setEnabled(false); + } else { vulkan_device = Settings::values.vulkan_device.GetValue(); + shader_backend = Settings::values.shader_backend.GetValue(); + ui->device_widget->setEnabled(true); + ui->backend_widget->setEnabled(true); } + switch (GetCurrentGraphicsBackend()) { case Settings::RendererBackend::OpenGL: - ui->device->addItem(tr("OpenGL Graphics Device")); - enabled = false; + ui->backend->setCurrentIndex(static_cast(shader_backend)); + ui->device_widget->setVisible(false); + ui->backend_widget->setVisible(true); break; case Settings::RendererBackend::Vulkan: - for (const auto& device : vulkan_devices) { - ui->device->addItem(device); - } ui->device->setCurrentIndex(vulkan_device); - enabled = !vulkan_devices.empty(); + ui->device_widget->setVisible(true); + ui->backend_widget->setVisible(false); break; } - // If in per-game config and use global is selected, don't enable. - enabled &= !(!Settings::IsConfiguringGlobal() && - ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX); - ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); } void ConfigureGraphics::RetrieveVulkanDevices() try { diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index 6418115cf..c866b911b 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -34,8 +34,9 @@ private: void SetConfiguration(); void UpdateBackgroundColorButton(QColor color); - void UpdateDeviceComboBox(); + void UpdateAPILayout(); void UpdateDeviceSelection(int device); + void UpdateShaderBackendSelection(int backend); void RetrieveVulkanDevices(); @@ -53,4 +54,5 @@ private: std::vector vulkan_devices; u32 vulkan_device{}; + Settings::ShaderBackend shader_backend{}; }; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 5b999d84d..099ddbb7c 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -23,7 +23,7 @@ - + 0 @@ -40,37 +40,107 @@ 6 - - - - API: - + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Shader Backend: + + + + + + + - - - - - OpenGL + + + + + 0 - - - - Vulkan + + 0 - + + 0 + + + 0 + + + + + Device: + + + + + + + - - - - Device: - + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + API: + + + + + + + + 0 + 0 + + + + + OpenGL + + + + + Vulkan + + + + + - - - diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index a9e611125..38276feb1 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -23,12 +23,10 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; void ConfigureGraphicsAdvanced::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); ui->use_vsync->setEnabled(runtime_lock); - ui->use_assembly_shaders->setEnabled(runtime_lock); ui->use_asynchronous_shaders->setEnabled(runtime_lock); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); - ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); @@ -58,8 +56,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, ui->anisotropic_filtering_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders, - ui->use_assembly_shaders, use_assembly_shaders); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, ui->use_asynchronous_shaders, use_asynchronous_shaders); @@ -100,7 +96,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); - ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal()); ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); @@ -112,8 +107,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { } ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); - ConfigurationShared::SetColoredTristate( - ui->use_assembly_shaders, Settings::values.use_assembly_shaders, use_assembly_shaders); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, Settings::values.use_asynchronous_shaders, use_asynchronous_shaders); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 9148aacf2..7356e6916 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -35,7 +35,6 @@ private: std::unique_ptr ui; ConfigurationShared::CheckState use_vsync; - ConfigurationShared::CheckState use_assembly_shaders; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; ConfigurationShared::CheckState use_caches_gc; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index ad0840355..772e5fed3 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -76,16 +76,6 @@ - - - - Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental. - - - Use assembly shaders (experimental, Nvidia OpenGL only) - - - @@ -144,22 +134,22 @@ - 2x + 2x (WILL BREAK THINGS) - 4x + 4x (WILL BREAK THINGS) - 8x + 8x (WILL BREAK THINGS) - 16x + 16x (WILL BREAK THINGS) diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 763df6dd6..640d7d111 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -458,7 +458,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation); ReadSetting("Renderer", Settings::values.use_vsync); ReadSetting("Renderer", Settings::values.disable_fps_limit); - ReadSetting("Renderer", Settings::values.use_assembly_shaders); + ReadSetting("Renderer", Settings::values.shader_backend); ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); ReadSetting("Renderer", Settings::values.use_nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index a6ca7b6cd..b7115b06a 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -248,9 +248,10 @@ max_anisotropy = # 0 (default): Off, 1: On use_vsync = -# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required. -# 0: Off, 1 (default): On -use_assembly_shaders = +# Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is +# not available and GLASM is selected, GLSL will be used. +# 0: GLSL, 1 (default): GLASM, 2: SPIR-V +shader_backend = # Whether to allow asynchronous shader building. # 0 (default): Off, 1: On -- cgit v1.2.3 From 4a82450c8139ee751f23f2d50bec6e748e7c9637 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 18 Jul 2021 20:56:50 -0300 Subject: cmake: Remove shader cache version --- src/common/CMakeLists.txt | 10 +--------- src/common/scm_rev.cpp.in | 2 -- src/core/reporter.cpp | 1 - 3 files changed, 1 insertion(+), 12 deletions(-) (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index c92266a17..3e34c6c2d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -1,8 +1,3 @@ -# Add a custom command to generate a new shader_cache_version hash when any of the following files change -# NOTE: This is an approximation of what files affect shader generation, its possible something else -# could affect the result, but much more unlikely than the following files. Keeping a list of files -# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update -set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core") if (DEFINED ENV{AZURECIREPO}) set(BUILD_REPOSITORY $ENV{AZURECIREPO}) endif() @@ -30,10 +25,7 @@ add_custom_command(OUTPUT scm_rev.cpp -DGIT_EXECUTABLE=${GIT_EXECUTABLE} -P ${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake DEPENDS - # WARNING! It was too much work to try and make a common location for this list, - # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well - # ... - # and also check that the scm_rev files haven't changed + # Check that the scm_rev files haven't changed "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" # technically we should regenerate if the git version changed, but its not worth the effort imo diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in index 5f126f324..cc88994c6 100644 --- a/src/common/scm_rev.cpp.in +++ b/src/common/scm_rev.cpp.in @@ -14,7 +14,6 @@ #define BUILD_ID "@BUILD_ID@" #define TITLE_BAR_FORMAT_IDLE "@TITLE_BAR_FORMAT_IDLE@" #define TITLE_BAR_FORMAT_RUNNING "@TITLE_BAR_FORMAT_RUNNING@" -#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@" namespace Common { @@ -28,7 +27,6 @@ const char g_build_version[] = BUILD_VERSION; const char g_build_id[] = BUILD_ID; const char g_title_bar_format_idle[] = TITLE_BAR_FORMAT_IDLE; const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING; -const char g_shader_cache_version[] = SHADER_CACHE_VERSION; } // namespace diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index cfaf50105..365b8f906 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -62,7 +62,6 @@ json GetYuzuVersionData() { {"build_date", std::string(Common::g_build_date)}, {"build_fullname", std::string(Common::g_build_fullname)}, {"build_version", std::string(Common::g_build_version)}, - {"shader_cache_version", std::string(Common::g_shader_cache_version)}, }; } -- cgit v1.2.3