diff options
Diffstat (limited to 'src/video_core')
88 files changed, 5823 insertions, 2876 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e2f85c5f1..6f3f2aa9f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -6,6 +6,7 @@ add_library(video_core STATIC dma_pusher.h debug_utils/debug_utils.cpp debug_utils/debug_utils.h + engines/const_buffer_engine_interface.h engines/const_buffer_info.h engines/engine_upload.cpp engines/engine_upload.h @@ -35,6 +36,8 @@ add_library(video_core STATIC memory_manager.h morton.cpp morton.h + rasterizer_accelerated.cpp + rasterizer_accelerated.h rasterizer_cache.cpp rasterizer_cache.h rasterizer_interface.h @@ -105,9 +108,17 @@ add_library(video_core STATIC shader/decode/warp.cpp shader/decode/xmad.cpp shader/decode/other.cpp + shader/ast.cpp + shader/ast.h + shader/compiler_settings.cpp + shader/compiler_settings.h + shader/const_buffer_locker.cpp + shader/const_buffer_locker.h shader/control_flow.cpp shader/control_flow.h shader/decode.cpp + shader/expr.cpp + shader/expr.h shader/node_helper.cpp shader/node_helper.h shader/node.h @@ -116,6 +127,8 @@ add_library(video_core STATIC shader/track.cpp surface.cpp surface.h + texture_cache/format_lookup_table.cpp + texture_cache/format_lookup_table.h texture_cache/surface_base.cpp texture_cache/surface_base.h texture_cache/surface_params.cpp @@ -169,3 +182,9 @@ target_link_libraries(video_core PRIVATE glad) if (ENABLE_VULKAN) target_link_libraries(video_core PRIVATE sirit) endif() + +if (MSVC) + target_compile_options(video_core PRIVATE /we4267) +else() + target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion) +endif() diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2442ddfd6..4408b5001 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -12,6 +12,10 @@ #include <utility> #include <vector> +#include <boost/icl/interval_map.hpp> +#include <boost/icl/interval_set.hpp> +#include <boost/range/iterator_range.hpp> + #include "common/alignment.h" #include "common/common_types.h" #include "core/core.h" @@ -30,7 +34,7 @@ public: using BufferInfo = std::pair<const TBufferType*, u64>; BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, - bool is_written = false) { + bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; auto& memory_manager = system.GPU().MemoryManager(); @@ -43,9 +47,13 @@ public: // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. constexpr std::size_t max_stream_size = 0x800; - if (size < max_stream_size) { + if (use_fast_cbuf || size < max_stream_size) { if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { - return StreamBufferUpload(host_ptr, size, alignment); + if (use_fast_cbuf) { + return ConstBufferUpload(host_ptr, size); + } else { + return StreamBufferUpload(host_ptr, size, alignment); + } } } @@ -152,6 +160,10 @@ protected: virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) = 0; + virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { + return {}; + } + /// Register an object into the cache void Register(const MapInterval& new_map, bool inherit_written = false) { const CacheAddr cache_ptr = new_map->GetStart(); diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h new file mode 100644 index 000000000..ac27b6cbe --- /dev/null +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -0,0 +1,119 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <type_traits> +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/textures/texture.h" + +namespace Tegra::Engines { + +enum class ShaderType : u32 { + Vertex = 0, + TesselationControl = 1, + TesselationEval = 2, + Geometry = 3, + Fragment = 4, + Compute = 5, +}; + +struct SamplerDescriptor { + union { + BitField<0, 20, Tegra::Shader::TextureType> texture_type; + BitField<20, 1, u32> is_array; + BitField<21, 1, u32> is_buffer; + BitField<22, 1, u32> is_shadow; + u32 raw{}; + }; + + bool operator==(const SamplerDescriptor& rhs) const noexcept { + return raw == rhs.raw; + } + + bool operator!=(const SamplerDescriptor& rhs) const noexcept { + return !operator==(rhs); + } + + static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { + SamplerDescriptor result; + switch (tic_texture_type) { + case Tegra::Texture::TextureType::Texture1D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture3D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::TextureCubemap: + result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture1DArray: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2DArray: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture1DBuffer: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(0); + result.is_buffer.Assign(1); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2DNoMipmap: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::TextureCubeArray: + result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + default: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + } +}; +static_assert(std::is_trivially_copyable_v<SamplerDescriptor>); + +class ConstBufferEngineInterface { +public: + virtual ~ConstBufferEngineInterface() = default; + virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; + virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; + virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const = 0; + virtual u32 GetBoundBuffer() const = 0; +}; + +} // namespace Tegra::Engines diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 7ff44f06d..85d308e26 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -28,6 +28,13 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { } } +std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { + const u32 line_a = src_2 - src_1; + const u32 line_b = dst_2 - dst_1; + const u32 excess = std::max<s32>(0, line_a - src_line + src_1); + return {line_b - (excess * line_b) / line_a, excess}; +} + void Fermi2D::HandleSurfaceCopy() { LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", static_cast<u32>(regs.operation)); @@ -47,10 +54,27 @@ void Fermi2D::HandleSurfaceCopy() { src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width); src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height); } + u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width; + u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height; + const auto [new_dst_w, src_excess_x] = + DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width); + const auto [new_dst_h, src_excess_y] = + DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height); + dst_blit_x2 = new_dst_w + regs.blit_dst_x; + src_blit_x2 = src_blit_x2 - src_excess_x; + dst_blit_y2 = new_dst_h + regs.blit_dst_y; + src_blit_y2 = src_blit_y2 - src_excess_y; + const auto [new_src_w, dst_excess_x] = + DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width); + const auto [new_src_h, dst_excess_y] = + DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height); + src_blit_x2 = new_src_w + src_blit_x1; + dst_blit_x2 = dst_blit_x2 - dst_excess_x; + src_blit_y2 = new_src_h + src_blit_y1; + dst_blit_y2 = dst_blit_y2 - dst_excess_y; const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; - const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, - regs.blit_dst_x + regs.blit_dst_width, - regs.blit_dst_y + regs.blit_dst_height}; + const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2, + dst_blit_y2}; Config copy_config; copy_config.operation = regs.operation; copy_config.filter = regs.blit_control.filter; diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0901cf2fa..dba342c70 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -99,19 +99,19 @@ public: union { struct { - INSERT_PADDING_WORDS(0x80); + INSERT_UNION_PADDING_WORDS(0x80); Surface dst; - INSERT_PADDING_WORDS(2); + INSERT_UNION_PADDING_WORDS(2); Surface src; - INSERT_PADDING_WORDS(0x15); + INSERT_UNION_PADDING_WORDS(0x15); Operation operation; - INSERT_PADDING_WORDS(0x177); + INSERT_UNION_PADDING_WORDS(0x177); union { u32 raw; @@ -119,7 +119,7 @@ public: BitField<4, 1, Filter> filter; } blit_control; - INSERT_PADDING_WORDS(0x8); + INSERT_UNION_PADDING_WORDS(0x8); u32 blit_dst_x; u32 blit_dst_y; @@ -130,7 +130,7 @@ public: u64 blit_src_x; u64 blit_src_y; - INSERT_PADDING_WORDS(0x21); + INSERT_UNION_PADDING_WORDS(0x21); }; std::array<u32, NUM_REGS> reg_array; }; diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 63d449135..3a39aeabe 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -50,7 +50,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { } } -Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { +Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); ASSERT(cbuf_mask[regs.tex_cb_index]); @@ -61,22 +61,38 @@ Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) co ASSERT(address < texinfo.Address() + texinfo.size); const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)}; - return GetTextureInfo(tex_handle, offset); + return GetTextureInfo(tex_handle); } -Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const { - return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id), - GetTSCEntry(tex_handle.tsc_id)}; +Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const { + return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; } -u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { +u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { + ASSERT(stage == ShaderType::Compute); const auto& buffer = launch_description.const_buffer_config[const_buffer]; u32 result; std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); return result; } +SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { + return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); +} + +SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const { + ASSERT(stage == ShaderType::Compute); + const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; + const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); + SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); + result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + return result; +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 90cf650d2..5259d92bd 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,6 +10,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" @@ -37,7 +38,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final { +class KeplerCompute final : public ConstBufferEngineInterface { public: explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); @@ -50,7 +51,7 @@ public: union { struct { - INSERT_PADDING_WORDS(0x60); + INSERT_UNION_PADDING_WORDS(0x60); Upload::Registers upload; @@ -62,7 +63,7 @@ public: u32 data_upload; - INSERT_PADDING_WORDS(0x3F); + INSERT_UNION_PADDING_WORDS(0x3F); struct { u32 address; @@ -71,11 +72,11 @@ public: } } launch_desc_loc; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); u32 launch; - INSERT_PADDING_WORDS(0x4A7); + INSERT_UNION_PADDING_WORDS(0x4A7); struct { u32 address_high; @@ -87,7 +88,7 @@ public: } } tsc; - INSERT_PADDING_WORDS(0x3); + INSERT_UNION_PADDING_WORDS(0x3); struct { u32 address_high; @@ -99,7 +100,7 @@ public: } } tic; - INSERT_PADDING_WORDS(0x22); + INSERT_UNION_PADDING_WORDS(0x22); struct { u32 address_high; @@ -110,11 +111,11 @@ public: } } code_loc; - INSERT_PADDING_WORDS(0x3FE); + INSERT_UNION_PADDING_WORDS(0x3FE); u32 tex_cb_index; - INSERT_PADDING_WORDS(0x374); + INSERT_UNION_PADDING_WORDS(0x374); }; std::array<u32, NUM_REGS> reg_array; }; @@ -178,7 +179,7 @@ public: }; INSERT_PADDING_WORDS(0x11); - } launch_description; + } launch_description{}; struct { u32 write_offset = 0; @@ -195,13 +196,21 @@ public: /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); - Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const; + Texture::FullTextureInfo GetTexture(std::size_t offset) const; - /// Given a Texture Handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const; + /// Given a texture handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; - u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; + u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; + + SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; + + SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const override; + + u32 GetBoundBuffer() const override { + return regs.tex_cb_index; + } private: Core::System& system; diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index e0e25c321..396fb6e86 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -45,7 +45,7 @@ public: union { struct { - INSERT_PADDING_WORDS(0x60); + INSERT_UNION_PADDING_WORDS(0x60); Upload::Registers upload; @@ -57,7 +57,7 @@ public: u32 data; - INSERT_PADDING_WORDS(0x11); + INSERT_UNION_PADDING_WORDS(0x11); }; std::array<u32, NUM_REGS> reg_array; }; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b318aedb8..a44c09003 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -98,10 +98,10 @@ void Maxwell3D::InitializeRegisterDefaults() { mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; } -#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) +#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name)) void Maxwell3D::InitDirtySettings() { - const auto set_block = [this](const u32 start, const u32 range, const u8 position) { + const auto set_block = [this](std::size_t start, std::size_t range, u8 position) { const auto start_itr = dirty_pointers.begin() + start; const auto end_itr = start_itr + range; std::fill(start_itr, end_itr, position); @@ -112,10 +112,10 @@ void Maxwell3D::InitDirtySettings() { constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; - u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); + u8 rt_dirty_reg = DIRTY_REGS_POS(render_target); for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { set_block(rt_reg, registers_per_rt, rt_dirty_reg); - rt_dirty_reg++; + ++rt_dirty_reg; } constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; @@ -129,35 +129,35 @@ void Maxwell3D::InitDirtySettings() { constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; - u32 va_reg = DIRTY_REGS_POS(vertex_array); - u32 vi_reg = DIRTY_REGS_POS(vertex_instance); + u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array); + u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; vertex_reg += vertex_array_size) { - set_block(vertex_reg, 3, va_reg); + set_block(vertex_reg, 3, va_dirty_reg); // The divisor concerns vertex array instances - dirty_pointers[vertex_reg + 3] = vi_reg; - va_reg++; - vi_reg++; + dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg; + ++va_dirty_reg; + ++vi_dirty_reg; } constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; - va_reg = DIRTY_REGS_POS(vertex_array); + va_dirty_reg = DIRTY_REGS_POS(vertex_array); for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; vertex_reg += vertex_limit_size) { - set_block(vertex_reg, vertex_limit_size, va_reg); - va_reg++; + set_block(vertex_reg, vertex_limit_size, va_dirty_reg); + va_dirty_reg++; } constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); constexpr u32 vertex_instance_size = sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); constexpr u32 vertex_instance_end = vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; - vi_reg = DIRTY_REGS_POS(vertex_instance); + vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; vertex_reg += vertex_instance_size) { - set_block(vertex_reg, vertex_instance_size, vi_reg); - vi_reg++; + set_block(vertex_reg, vertex_instance_size, vi_dirty_reg); + vi_dirty_reg++; } set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), DIRTY_REGS_POS(vertex_attrib_format)); @@ -171,7 +171,7 @@ void Maxwell3D::InitDirtySettings() { // State // Viewport - constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); + constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport); constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); set_block(viewport_start, viewport_size, viewport_dirty_reg); @@ -198,7 +198,7 @@ void Maxwell3D::InitDirtySettings() { set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); // Depth Test - constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); + constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; @@ -223,12 +223,12 @@ void Maxwell3D::InitDirtySettings() { dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; // Color Mask - constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); + constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), color_mask_dirty_reg); // Blend State - constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); + constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), blend_state_dirty_reg); dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; @@ -237,18 +237,23 @@ void Maxwell3D::InitDirtySettings() { blend_state_dirty_reg); // Scissor State - constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); + constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), scissor_test_dirty_reg); // Polygon Offset - constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); + constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; + + // Depth bounds + constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); + dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg; + dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg; } void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { @@ -256,7 +261,8 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3 executing_macro = 0; // Lookup the macro offset - const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size(); + const u32 entry = + ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); // Execute the current macro. macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); @@ -473,7 +479,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { } void Maxwell3D::FlushMMEInlineDraw() { - LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), + LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); @@ -736,14 +742,6 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); - [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()}; - [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()}; - [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()}; - [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()}; - - // TODO(Subv): Different data types for separate components are not supported - DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); - return tic_entry; } @@ -755,61 +753,8 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { return tsc_entry; } -std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const { - std::vector<Texture::FullTextureInfo> textures; - - auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)]; - auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index]; - ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - - GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size; - - // Offset into the texture constbuffer where the texture info begins. - static constexpr std::size_t TextureInfoOffset = 0x20; - - for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; - current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { - - const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)}; - - Texture::FullTextureInfo tex_info{}; - // TODO(Subv): Use the shader to determine which textures are actually accessed. - tex_info.index = - static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) / - sizeof(Texture::TextureHandle); - - // Load the TIC data. - auto tic_entry = GetTICEntry(tex_handle.tic_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry)); - - // Load the TSC data - auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry)); - - textures.push_back(tex_info); - } - - return textures; -} - -Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const { - Texture::FullTextureInfo tex_info{}; - tex_info.index = static_cast<u32>(offset); - - // Load the TIC data. - auto tic_entry = GetTICEntry(tex_handle.tic_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry)); - - // Load the TSC data - auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry)); - - return tex_info; +Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const { + return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; } Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, @@ -825,7 +770,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; - return GetTextureInfo(tex_handle, offset); + return GetTextureInfo(tex_handle); } u32 Maxwell3D::GetRegisterValue(u32 method) const { @@ -841,7 +786,8 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer.Clear(); } -u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { +u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { + ASSERT(stage != ShaderType::Compute); const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; const auto& buffer = shader_stage.const_buffers[const_buffer]; u32 result; @@ -849,4 +795,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6 return result; } +SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { + return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); +} + +SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const { + ASSERT(stage != ShaderType::Compute); + const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; + const auto& tex_info_buffer = shader.const_buffers[const_buffer]; + const GPUVAddr tex_info_address = tex_info_buffer.address + offset; + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); + SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); + result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + return result; +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 4c97759ed..1aa7c274f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -15,6 +15,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" +#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" @@ -44,7 +45,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final { +class Maxwell3D final : public ConstBufferEngineInterface { public: explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); @@ -495,7 +496,7 @@ public: Equation equation_a; Factor factor_source_a; Factor factor_dest_a; - INSERT_PADDING_WORDS(1); + INSERT_UNION_PADDING_WORDS(1); }; struct RenderTargetConfig { @@ -516,7 +517,7 @@ public: }; u32 layer_stride; u32 base_layer; - INSERT_PADDING_WORDS(7); + INSERT_UNION_PADDING_WORDS(7); GPUVAddr Address() const { return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | @@ -541,7 +542,7 @@ public: f32 translate_x; f32 translate_y; f32 translate_z; - INSERT_PADDING_WORDS(2); + INSERT_UNION_PADDING_WORDS(2); Common::Rectangle<s32> GetRect() const { return { @@ -605,7 +606,7 @@ public: union { struct { - INSERT_PADDING_WORDS(0x45); + INSERT_UNION_PADDING_WORDS(0x45); struct { u32 upload_address; @@ -614,7 +615,7 @@ public: u32 bind; } macros; - INSERT_PADDING_WORDS(0x17); + INSERT_UNION_PADDING_WORDS(0x17); Upload::Registers upload; struct { @@ -625,7 +626,7 @@ public: u32 data_upload; - INSERT_PADDING_WORDS(0x44); + INSERT_UNION_PADDING_WORDS(0x44); struct { union { @@ -635,11 +636,11 @@ public: }; } sync_info; - INSERT_PADDING_WORDS(0x11E); + INSERT_UNION_PADDING_WORDS(0x11E); u32 tfb_enabled; - INSERT_PADDING_WORDS(0x2E); + INSERT_UNION_PADDING_WORDS(0x2E); std::array<RenderTargetConfig, NumRenderTargets> rt; @@ -647,47 +648,49 @@ public: std::array<ViewPort, NumViewports> viewports; - INSERT_PADDING_WORDS(0x1D); + INSERT_UNION_PADDING_WORDS(0x1D); struct { u32 first; u32 count; } vertex_buffer; - INSERT_PADDING_WORDS(1); + INSERT_UNION_PADDING_WORDS(1); float clear_color[4]; float clear_depth; - INSERT_PADDING_WORDS(0x3); + INSERT_UNION_PADDING_WORDS(0x3); s32 clear_stencil; - INSERT_PADDING_WORDS(0x7); + INSERT_UNION_PADDING_WORDS(0x7); u32 polygon_offset_point_enable; u32 polygon_offset_line_enable; u32 polygon_offset_fill_enable; - INSERT_PADDING_WORDS(0xD); + INSERT_UNION_PADDING_WORDS(0xD); std::array<ScissorTest, NumViewports> scissor_test; - INSERT_PADDING_WORDS(0x15); + INSERT_UNION_PADDING_WORDS(0x15); s32 stencil_back_func_ref; u32 stencil_back_mask; u32 stencil_back_func_mask; - INSERT_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0xC); u32 color_mask_common; - INSERT_PADDING_WORDS(0x6); + INSERT_UNION_PADDING_WORDS(0x6); u32 rt_separate_frag_data; - INSERT_PADDING_WORDS(0xC); + f32 depth_bounds[2]; + + INSERT_UNION_PADDING_WORDS(0xA); struct { u32 address_high; @@ -707,7 +710,7 @@ public: } } zeta; - INSERT_PADDING_WORDS(0x41); + INSERT_UNION_PADDING_WORDS(0x41); union { BitField<0, 4, u32> stencil; @@ -716,11 +719,11 @@ public: BitField<12, 4, u32> viewport; } clear_flags; - INSERT_PADDING_WORDS(0x19); + INSERT_UNION_PADDING_WORDS(0x19); std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; - INSERT_PADDING_WORDS(0xF); + INSERT_UNION_PADDING_WORDS(0xF); struct { union { @@ -743,16 +746,16 @@ public: } } rt_control; - INSERT_PADDING_WORDS(0x2); + INSERT_UNION_PADDING_WORDS(0x2); u32 zeta_width; u32 zeta_height; - INSERT_PADDING_WORDS(0x27); + INSERT_UNION_PADDING_WORDS(0x27); u32 depth_test_enable; - INSERT_PADDING_WORDS(0x5); + INSERT_UNION_PADDING_WORDS(0x5); u32 independent_blend_enable; @@ -760,7 +763,7 @@ public: u32 alpha_test_enabled; - INSERT_PADDING_WORDS(0x6); + INSERT_UNION_PADDING_WORDS(0x6); u32 d3d_cull_mode; @@ -774,7 +777,7 @@ public: float b; float a; } blend_color; - INSERT_PADDING_WORDS(0x4); + INSERT_UNION_PADDING_WORDS(0x4); struct { u32 separate_alpha; @@ -783,7 +786,7 @@ public: Blend::Factor factor_dest_rgb; Blend::Equation equation_a; Blend::Factor factor_source_a; - INSERT_PADDING_WORDS(1); + INSERT_UNION_PADDING_WORDS(1); Blend::Factor factor_dest_a; u32 enable_common; @@ -799,7 +802,7 @@ public: u32 stencil_front_func_mask; u32 stencil_front_mask; - INSERT_PADDING_WORDS(0x2); + INSERT_UNION_PADDING_WORDS(0x2); u32 frag_color_clamp; @@ -808,12 +811,12 @@ public: BitField<4, 1, u32> triangle_rast_flip; } screen_y_control; - INSERT_PADDING_WORDS(0x21); + INSERT_UNION_PADDING_WORDS(0x21); u32 vb_element_base; u32 vb_base_instance; - INSERT_PADDING_WORDS(0x35); + INSERT_UNION_PADDING_WORDS(0x35); union { BitField<0, 1, u32> c0; @@ -826,11 +829,11 @@ public: BitField<7, 1, u32> c7; } clip_distance_enabled; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); float point_size; - INSERT_PADDING_WORDS(0x7); + INSERT_UNION_PADDING_WORDS(0x7); u32 zeta_enable; @@ -839,7 +842,7 @@ public: BitField<4, 1, u32> alpha_to_one; } multisample_control; - INSERT_PADDING_WORDS(0x4); + INSERT_UNION_PADDING_WORDS(0x4); struct { u32 address_high; @@ -863,11 +866,11 @@ public: } } tsc; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); float polygon_offset_factor; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); struct { u32 tic_address_high; @@ -880,7 +883,7 @@ public: } } tic; - INSERT_PADDING_WORDS(0x5); + INSERT_UNION_PADDING_WORDS(0x5); u32 stencil_two_side_enable; StencilOp stencil_back_op_fail; @@ -888,13 +891,13 @@ public: StencilOp stencil_back_op_zpass; ComparisonOp stencil_back_func_func; - INSERT_PADDING_WORDS(0x4); + INSERT_UNION_PADDING_WORDS(0x4); u32 framebuffer_srgb; float polygon_offset_units; - INSERT_PADDING_WORDS(0x11); + INSERT_UNION_PADDING_WORDS(0x11); union { BitField<2, 1, u32> coord_origin; @@ -910,7 +913,7 @@ public: (static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low); } } code_address; - INSERT_PADDING_WORDS(1); + INSERT_UNION_PADDING_WORDS(1); struct { u32 vertex_end_gl; @@ -922,14 +925,14 @@ public: }; } draw; - INSERT_PADDING_WORDS(0xA); + INSERT_UNION_PADDING_WORDS(0xA); struct { u32 enabled; u32 index; } primitive_restart; - INSERT_PADDING_WORDS(0x5F); + INSERT_UNION_PADDING_WORDS(0x5F); struct { u32 start_addr_high; @@ -970,9 +973,9 @@ public: } } index_array; - INSERT_PADDING_WORDS(0x7); + INSERT_UNION_PADDING_WORDS(0x7); - INSERT_PADDING_WORDS(0x1F); + INSERT_UNION_PADDING_WORDS(0x1F); float polygon_offset_clamp; @@ -986,17 +989,17 @@ public: } } instanced_arrays; - INSERT_PADDING_WORDS(0x6); + INSERT_UNION_PADDING_WORDS(0x6); Cull cull; u32 pixel_center_integer; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); u32 viewport_transform_enabled; - INSERT_PADDING_WORDS(0x3); + INSERT_UNION_PADDING_WORDS(0x3); union { BitField<0, 1, u32> depth_range_0_1; @@ -1004,13 +1007,13 @@ public: BitField<4, 1, u32> depth_clamp_far; } view_volume_clip_control; - INSERT_PADDING_WORDS(0x21); + INSERT_UNION_PADDING_WORDS(0x21); struct { u32 enable; LogicOperation operation; } logic_op; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); union { u32 raw; @@ -1023,9 +1026,9 @@ public: BitField<6, 4, u32> RT; BitField<10, 11, u32> layer; } clear_buffers; - INSERT_PADDING_WORDS(0xB); + INSERT_UNION_PADDING_WORDS(0xB); std::array<ColorMask, NumRenderTargets> color_mask; - INSERT_PADDING_WORDS(0x38); + INSERT_UNION_PADDING_WORDS(0x38); struct { u32 query_address_high; @@ -1047,7 +1050,7 @@ public: } } query; - INSERT_PADDING_WORDS(0x3C); + INSERT_UNION_PADDING_WORDS(0x3C); struct { union { @@ -1087,10 +1090,10 @@ public: BitField<4, 4, ShaderProgram> program; }; u32 offset; - INSERT_PADDING_WORDS(14); + INSERT_UNION_PADDING_WORDS(14); } shader_config[MaxShaderProgram]; - INSERT_PADDING_WORDS(0x60); + INSERT_UNION_PADDING_WORDS(0x60); u32 firmware[0x20]; @@ -1107,7 +1110,7 @@ public: } } const_buffer; - INSERT_PADDING_WORDS(0x10); + INSERT_UNION_PADDING_WORDS(0x10); struct { union { @@ -1115,14 +1118,14 @@ public: BitField<0, 1, u32> valid; BitField<4, 5, u32> index; }; - INSERT_PADDING_WORDS(7); + INSERT_UNION_PADDING_WORDS(7); } cb_bind[MaxShaderStage]; - INSERT_PADDING_WORDS(0x56); + INSERT_UNION_PADDING_WORDS(0x56); u32 tex_cb_index; - INSERT_PADDING_WORDS(0x395); + INSERT_UNION_PADDING_WORDS(0x395); struct { /// Compressed address of a buffer that holds information about bound SSBOs. @@ -1134,14 +1137,14 @@ public: } } ssbo_info; - INSERT_PADDING_WORDS(0x11); + INSERT_UNION_PADDING_WORDS(0x11); struct { u32 address[MaxShaderStage]; u32 size[MaxShaderStage]; } tex_info_buffers; - INSERT_PADDING_WORDS(0xCC); + INSERT_UNION_PADDING_WORDS(0xCC); }; std::array<u32, NUM_REGS> reg_array; }; @@ -1163,6 +1166,8 @@ public: struct DirtyRegs { static constexpr std::size_t NUM_REGS = 256; + static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max()); + union { struct { bool null_dirty; @@ -1201,6 +1206,7 @@ public: bool transform_feedback; bool color_mask; bool polygon_offset; + bool depth_bounds_values; // Complementary bool viewport_transform; @@ -1244,17 +1250,22 @@ public: void FlushMMEInlineDraw(); - /// Given a Texture Handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const; - - /// Returns a list of enabled textures for the specified shader stage. - std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; + /// Given a texture handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; - u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; + u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; + + SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; + + SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const override; + + u32 GetBoundBuffer() const override { + return regs.tex_cb_index; + } /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. @@ -1400,6 +1411,7 @@ ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); ASSERT_REG_POSITION(color_mask_common, 0x3E4); ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); +ASSERT_REG_POSITION(depth_bounds, 0x3EC); ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 93808a9bb..4f40d1d1f 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -94,7 +94,7 @@ public: union { struct { - INSERT_PADDING_WORDS(0xC0); + INSERT_UNION_PADDING_WORDS(0xC0); struct { union { @@ -112,7 +112,7 @@ public: }; } exec; - INSERT_PADDING_WORDS(0x3F); + INSERT_UNION_PADDING_WORDS(0x3F); struct { u32 address_high; @@ -139,7 +139,7 @@ public: u32 x_count; u32 y_count; - INSERT_PADDING_WORDS(0xB8); + INSERT_UNION_PADDING_WORDS(0xB8); u32 const0; u32 const1; @@ -162,11 +162,11 @@ public: Parameters dst_params; - INSERT_PADDING_WORDS(1); + INSERT_UNION_PADDING_WORDS(1); Parameters src_params; - INSERT_PADDING_WORDS(0x13); + INSERT_UNION_PADDING_WORDS(0x13); }; std::array<u32, NUM_REGS> reg_array; }; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7a6355ce2..9fafed4a2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -574,7 +574,7 @@ enum class ShuffleOperation : u64 { }; union Instruction { - Instruction& operator=(const Instruction& instr) { + constexpr Instruction& operator=(const Instruction& instr) { value = instr.value; return *this; } @@ -616,6 +616,14 @@ union Instruction { } shfl; union { + BitField<44, 1, u64> ftz; + BitField<39, 2, u64> tab5cb8_2; + BitField<38, 1, u64> ndv; + BitField<47, 1, u64> cc; + BitField<28, 8, u64> swizzle; + } fswzadd; + + union { BitField<8, 8, Register> gpr; BitField<20, 24, s64> offset; } gmem; @@ -1238,6 +1246,32 @@ union Instruction { } tld4; union { + BitField<35, 1, u64> ndv_flag; + BitField<49, 1, u64> nodep_flag; + BitField<50, 1, u64> dc_flag; + BitField<33, 2, u64> info; + BitField<37, 2, u64> component; + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::NDV: + return ndv_flag != 0; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + case TextureMiscMode::DC: + return dc_flag != 0; + case TextureMiscMode::AOFFI: + return info == 1; + case TextureMiscMode::PTP: + return info == 2; + default: + break; + } + return false; + } + } tld4_b; + + union { BitField<49, 1, u64> nodep_flag; BitField<50, 1, u64> dc_flag; BitField<51, 1, u64> aoffi_flag; @@ -1452,7 +1486,8 @@ union Instruction { u32 value = static_cast<u32>(target); // The branch offset is relative to the next instruction and is stored in bytes, so // divide it by the size of an instruction and add 1 to it. - return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; + return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) + + 1; } } bra; @@ -1466,7 +1501,8 @@ union Instruction { u32 value = static_cast<u32>(target); // The branch offset is relative to the next instruction and is stored in bytes, so // divide it by the size of an instruction and add 1 to it. - return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; + return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) + + 1; } } brx; @@ -1564,6 +1600,7 @@ public: DEPBAR, VOTE, SHFL, + FSWZADD, BFE_C, BFE_R, BFE_IMM, @@ -1590,7 +1627,8 @@ public: TEXS, // Texture Fetch with scalar/non-vec4 source/destinations TLD, // Texture Load TLDS, // Texture Load with scalar/non-vec4 source/destinations - TLD4, // Texture Load 4 + TLD4, // Texture Gather 4 + TLD4_B, // Texture Gather 4 Bindless TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations TMML_B, // Texture Mip Map Level TMML, // Texture Mip Map Level @@ -1760,22 +1798,22 @@ public: class Matcher { public: - Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type) + constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type) : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {} - const char* GetName() const { + constexpr const char* GetName() const { return name; } - u16 GetMask() const { + constexpr u16 GetMask() const { return mask; } - Id GetId() const { + constexpr Id GetId() const { return id; } - Type GetType() const { + constexpr Type GetType() const { return type; } @@ -1784,7 +1822,7 @@ public: * @param instruction The instruction to test * @returns true if the given instruction matches. */ - bool Matches(u16 instruction) const { + constexpr bool Matches(u16 instruction) const { return (instruction & mask) == expected; } @@ -1818,32 +1856,32 @@ private: * A '0' in a bitstring indicates that a zero must be present at that bit position. * A '1' in a bitstring indicates that a one must be present at that bit position. */ - static auto GetMaskAndExpect(const char* const bitstring) { + static constexpr auto GetMaskAndExpect(const char* const bitstring) { u16 mask = 0, expect = 0; for (std::size_t i = 0; i < opcode_bitsize; i++) { const std::size_t bit_position = opcode_bitsize - i - 1; switch (bitstring[i]) { case '0': - mask |= 1 << bit_position; + mask |= static_cast<u16>(1U << bit_position); break; case '1': - expect |= 1 << bit_position; - mask |= 1 << bit_position; + expect |= static_cast<u16>(1U << bit_position); + mask |= static_cast<u16>(1U << bit_position); break; default: // Ignore break; } } - return std::make_tuple(mask, expect); + return std::make_pair(mask, expect); } public: /// Creates a matcher that can match and parse instructions based on bitstring. - static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type, - const char* const name) { - const auto mask_expect = GetMaskAndExpect(bitstring); - return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type); + static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type, + const char* const name) { + const auto [mask, expected] = GetMaskAndExpect(bitstring); + return Matcher(name, mask, expected, op, type); } }; @@ -1861,6 +1899,7 @@ private: INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), + INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), @@ -1881,6 +1920,7 @@ private: INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), + INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index e86a7f04a..bc80661d8 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -38,37 +38,37 @@ struct Header { BitField<26, 1, u32> does_load_or_store; BitField<27, 1, u32> does_fp64; BitField<28, 4, u32> stream_out_mask; - } common0; + } common0{}; union { BitField<0, 24, u32> shader_local_memory_low_size; BitField<24, 8, u32> per_patch_attribute_count; - } common1; + } common1{}; union { BitField<0, 24, u32> shader_local_memory_high_size; BitField<24, 8, u32> threads_per_input_primitive; - } common2; + } common2{}; union { BitField<0, 24, u32> shader_local_memory_crs_size; BitField<24, 4, OutputTopology> output_topology; BitField<28, 4, u32> reserved; - } common3; + } common3{}; union { BitField<0, 12, u32> max_output_vertices; BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. BitField<24, 4, u32> reserved; BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4; + } common4{}; union { struct { - INSERT_PADDING_BYTES(3); // ImapSystemValuesA - INSERT_PADDING_BYTES(1); // ImapSystemValuesB - INSERT_PADDING_BYTES(16); // ImapGenericVector[32] - INSERT_PADDING_BYTES(2); // ImapColor + INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA + INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB + INSERT_UNION_PADDING_BYTES(16); // ImapGenericVector[32] + INSERT_UNION_PADDING_BYTES(2); // ImapColor union { BitField<0, 8, u16> clip_distances; BitField<8, 1, u16> point_sprite_s; @@ -79,20 +79,20 @@ struct Header { BitField<14, 1, u16> instance_id; BitField<15, 1, u16> vertex_id; }; - INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES(1); // ImapReserved - INSERT_PADDING_BYTES(3); // OmapSystemValuesA - INSERT_PADDING_BYTES(1); // OmapSystemValuesB - INSERT_PADDING_BYTES(16); // OmapGenericVector[32] - INSERT_PADDING_BYTES(2); // OmapColor - INSERT_PADDING_BYTES(2); // OmapSystemValuesC - INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10] - INSERT_PADDING_BYTES(1); // OmapReserved + INSERT_UNION_PADDING_BYTES(5); // ImapFixedFncTexture[10] + INSERT_UNION_PADDING_BYTES(1); // ImapReserved + INSERT_UNION_PADDING_BYTES(3); // OmapSystemValuesA + INSERT_UNION_PADDING_BYTES(1); // OmapSystemValuesB + INSERT_UNION_PADDING_BYTES(16); // OmapGenericVector[32] + INSERT_UNION_PADDING_BYTES(2); // OmapColor + INSERT_UNION_PADDING_BYTES(2); // OmapSystemValuesC + INSERT_UNION_PADDING_BYTES(5); // OmapFixedFncTexture[10] + INSERT_UNION_PADDING_BYTES(1); // OmapReserved } vtg; struct { - INSERT_PADDING_BYTES(3); // ImapSystemValuesA - INSERT_PADDING_BYTES(1); // ImapSystemValuesB + INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA + INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB union { BitField<0, 2, AttributeUse> x; BitField<2, 2, AttributeUse> y; @@ -100,10 +100,10 @@ struct Header { BitField<6, 2, AttributeUse> z; u8 raw; } imap_generic_vector[32]; - INSERT_PADDING_BYTES(2); // ImapColor - INSERT_PADDING_BYTES(2); // ImapSystemValuesC - INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES(2); // ImapReserved + INSERT_UNION_PADDING_BYTES(2); // ImapColor + INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC + INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10] + INSERT_UNION_PADDING_BYTES(2); // ImapReserved struct { u32 target; union { @@ -139,6 +139,8 @@ struct Header { return result; } } ps; + + std::array<u32, 0xF> raw{}; }; u64 GetLocalMemorySize() const { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 76cfe8107..095660115 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" @@ -17,6 +18,8 @@ namespace Tegra { +MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); + GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::WaitFence(u32 syncpoint_id, u32 value) const { + // Synced GPU, is always in sync + if (!is_async) { + return; + } + MICROPROFILE_SCOPE(GPU_wait); + while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { + } +} + void GPU::IncrementSyncPoint(const u32 syncpoint_id) { syncpoints[syncpoint_id]++; std::lock_guard lock{sync_mutex}; @@ -326,7 +339,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); + block.timestamp = system.CoreTiming().GetTicks(); memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..ecc338ae9 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,12 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + // Waits for the GPU to finish working + virtual void WaitIdle() const = 0; + + /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. + void WaitFence(u32 syncpoint_id, u32 value) const; + void IncrementSyncPoint(u32 syncpoint_id); u32 GetSyncpointValue(u32 syncpoint_id) const; @@ -201,7 +207,7 @@ public: union { struct { - INSERT_PADDING_WORDS(0x4); + INSERT_UNION_PADDING_WORDS(0x4); struct { u32 address_high; u32 address_low; @@ -214,12 +220,12 @@ public: u32 semaphore_sequence; u32 semaphore_trigger; - INSERT_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0xC); // The puser and the puller share the reference counter, the pusher only has read // access u32 reference_count; - INSERT_PADDING_WORDS(0x5); + INSERT_UNION_PADDING_WORDS(0x5); u32 semaphore_acquire; u32 semaphore_release; @@ -228,7 +234,7 @@ public: BitField<4, 4, u32> operation; BitField<8, 8, u32> id; } fence_action; - INSERT_PADDING_WORDS(0xE2); + INSERT_UNION_PADDING_WORDS(0xE2); // Puller state u32 acquire_mode; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index f2a3a390e..04222d060 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } +void GPUAsynch::WaitIdle() const { + gpu_thread.WaitIdle(); +} + } // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index a12f9bac4..1241ade1d 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override; protected: void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 5eb1c461c..c71baee89 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override {} protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..758a37f14 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,8 +5,6 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/frontend/scope_acquire_window_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; - synchronization_event = system.CoreTiming().RegisterEvent( - "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { - const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; - const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; - system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); + PushCommand(SubmitListCommand(std::move(entries))); } void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { @@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { InvalidateRegion(addr, size); } +void ThreadManager::WaitIdle() const { + while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { + } +} + u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); return fence; } -MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -void SynchState::WaitForSynchronization(u64 fence) { - while (signaled_fence.load() < fence) - ; -} - } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..08dc96bb3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -21,9 +21,6 @@ class DmaPusher; namespace Core { class System; -namespace Timing { -struct EventType; -} // namespace Timing } // namespace Core namespace VideoCommon::GPUThread { @@ -89,8 +86,6 @@ struct CommandDataContainer { struct SynchState final { std::atomic_bool is_running{true}; - void WaitForSynchronization(u64 fence); - using CommandQueue = Common::SPSCQueue<CommandDataContainer>; CommandQueue queue; u64 last_fence{}; @@ -121,6 +116,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(CacheAddr addr, u64 size); + // Wait until the gpu thread is idle. + void WaitIdle() const; + private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data); @@ -128,7 +126,6 @@ private: private: SynchState state; Core::System& system; - Core::Timing::EventType* synchronization_event{}; std::thread thread; std::thread::id thread_id; }; diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index dbaeac6db..42031d80a 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -11,6 +11,77 @@ MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); namespace Tegra { +namespace { +enum class Operation : u32 { + ALU = 0, + AddImmediate = 1, + ExtractInsert = 2, + ExtractShiftLeftImmediate = 3, + ExtractShiftLeftRegister = 4, + Read = 5, + Unused = 6, // This operation doesn't seem to be a valid encoding. + Branch = 7, +}; +} // Anonymous namespace + +enum class MacroInterpreter::ALUOperation : u32 { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + // Operations 4-7 don't seem to be valid encodings. + Xor = 8, + Or = 9, + And = 10, + AndNot = 11, + Nand = 12 +}; + +enum class MacroInterpreter::ResultOperation : u32 { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMethod = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMethod = 5, + MoveAndSetMethodFetchAndSend = 6, + MoveAndSetMethodSend = 7 +}; + +enum class MacroInterpreter::BranchCondition : u32 { + Zero = 0, + NotZero = 1, +}; + +union MacroInterpreter::Opcode { + u32 raw; + BitField<0, 3, Operation> operation; + BitField<4, 3, ResultOperation> result_operation; + BitField<4, 1, BranchCondition> branch_condition; + // If set on a branch, then the branch doesn't have a delay slot. + BitField<5, 1, u32> branch_annul; + BitField<7, 1, u32> is_exit; + BitField<8, 3, u32> dst; + BitField<11, 3, u32> src_a; + BitField<14, 3, u32> src_b; + // The signed immediate overlaps the second source operand and the alu operation. + BitField<14, 18, s32> immediate; + + BitField<17, 5, ALUOperation> alu_operation; + + // Bitfield instructions data + BitField<17, 5, u32> bf_src_bit; + BitField<22, 5, u32> bf_size; + BitField<27, 5, u32> bf_dst_bit; + + u32 GetBitfieldMask() const { + return (1 << bf_size) - 1; + } + + s32 GetBranchTarget() const { + return static_cast<s32>(immediate * sizeof(u32)); + } +}; MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index 76b6a895b..631146d89 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h @@ -6,7 +6,6 @@ #include <array> #include <optional> -#include <vector> #include "common/bit_field.h" #include "common/common_types.h" @@ -28,75 +27,11 @@ public: void Execute(u32 offset, std::size_t num_parameters, const u32* parameters); private: - enum class Operation : u32 { - ALU = 0, - AddImmediate = 1, - ExtractInsert = 2, - ExtractShiftLeftImmediate = 3, - ExtractShiftLeftRegister = 4, - Read = 5, - Unused = 6, // This operation doesn't seem to be a valid encoding. - Branch = 7, - }; - - enum class ALUOperation : u32 { - Add = 0, - AddWithCarry = 1, - Subtract = 2, - SubtractWithBorrow = 3, - // Operations 4-7 don't seem to be valid encodings. - Xor = 8, - Or = 9, - And = 10, - AndNot = 11, - Nand = 12 - }; - - enum class ResultOperation : u32 { - IgnoreAndFetch = 0, - Move = 1, - MoveAndSetMethod = 2, - FetchAndSend = 3, - MoveAndSend = 4, - FetchAndSetMethod = 5, - MoveAndSetMethodFetchAndSend = 6, - MoveAndSetMethodSend = 7 - }; + enum class ALUOperation : u32; + enum class BranchCondition : u32; + enum class ResultOperation : u32; - enum class BranchCondition : u32 { - Zero = 0, - NotZero = 1, - }; - - union Opcode { - u32 raw; - BitField<0, 3, Operation> operation; - BitField<4, 3, ResultOperation> result_operation; - BitField<4, 1, BranchCondition> branch_condition; - BitField<5, 1, u32> - branch_annul; // If set on a branch, then the branch doesn't have a delay slot. - BitField<7, 1, u32> is_exit; - BitField<8, 3, u32> dst; - BitField<11, 3, u32> src_a; - BitField<14, 3, u32> src_b; - // The signed immediate overlaps the second source operand and the alu operation. - BitField<14, 18, s32> immediate; - - BitField<17, 5, ALUOperation> alu_operation; - - // Bitfield instructions data - BitField<17, 5, u32> bf_src_bit; - BitField<22, 5, u32> bf_size; - BitField<27, 5, u32> bf_dst_bit; - - u32 GetBitfieldMask() const { - return (1 << bf_size) - 1; - } - - s32 GetBranchTarget() const { - return static_cast<s32>(immediate * sizeof(u32)); - } - }; + union Opcode; union MethodAddress { u32 raw; @@ -149,9 +84,10 @@ private: Engines::Maxwell3D& maxwell3d; - u32 pc; ///< Current program counter - std::optional<u32> - delayed_pc; ///< Program counter to execute at after the delay slot is executed. + /// Current program counter + u32 pc; + /// Program counter to execute at after the delay slot is executed. + std::optional<u32> delayed_pc; static constexpr std::size_t NumMacroRegisters = 8; diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index ab71870ab..2f2fe6859 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -93,6 +93,7 @@ static constexpr ConversionArray morton_to_linear_fns = { MortonCopy<true, PixelFormat::DXT23_SRGB>, MortonCopy<true, PixelFormat::DXT45_SRGB>, MortonCopy<true, PixelFormat::BC7U_SRGB>, + MortonCopy<true, PixelFormat::R4G4B4A4U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, @@ -101,6 +102,17 @@ static constexpr ConversionArray morton_to_linear_fns = { MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_10X8>, MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_6X6>, + MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_10X10>, + MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_12X12>, + MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_8X6>, + MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, + MortonCopy<true, PixelFormat::ASTC_2D_6X5>, + MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, + MortonCopy<true, PixelFormat::E5B9G9R9F>, MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z24S8>, @@ -162,6 +174,7 @@ static constexpr ConversionArray linear_to_morton_fns = { MortonCopy<false, PixelFormat::DXT23_SRGB>, MortonCopy<false, PixelFormat::DXT45_SRGB>, MortonCopy<false, PixelFormat::BC7U_SRGB>, + MortonCopy<false, PixelFormat::R4G4B4A4U>, nullptr, nullptr, nullptr, @@ -170,6 +183,17 @@ static constexpr ConversionArray linear_to_morton_fns = { nullptr, nullptr, nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + MortonCopy<false, PixelFormat::E5B9G9R9F>, MortonCopy<false, PixelFormat::Z32F>, MortonCopy<false, PixelFormat::Z16>, MortonCopy<false, PixelFormat::Z24S8>, diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp new file mode 100644 index 000000000..b230dcc18 --- /dev/null +++ b/src/video_core/rasterizer_accelerated.cpp @@ -0,0 +1,63 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <mutex> + +#include <boost/icl/interval_map.hpp> + +#include "common/assert.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/rasterizer_accelerated.h" + +namespace VideoCore { + +namespace { + +template <typename Map, typename Interval> +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} + +} // Anonymous namespace + +RasterizerAccelerated::RasterizerAccelerated() = default; + +RasterizerAccelerated::~RasterizerAccelerated() = default; + +void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + std::lock_guard lock{pages_mutex}; + const u64 page_start{addr >> Memory::PAGE_BITS}; + const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) { + cached_pages.add({pages_interval, delta}); + } + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u64 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) { + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + } else if (delta < 0 && count == -delta) { + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + } else { + ASSERT(count >= 0); + } + } + + if (delta < 0) { + cached_pages.add({pages_interval, delta}); + } +} + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h new file mode 100644 index 000000000..8f7e3547e --- /dev/null +++ b/src/video_core/rasterizer_accelerated.h @@ -0,0 +1,31 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <mutex> + +#include <boost/icl/interval_map.hpp> + +#include "common/common_types.h" +#include "video_core/rasterizer_interface.h" + +namespace VideoCore { + +/// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface. +class RasterizerAccelerated : public RasterizerInterface { +public: + explicit RasterizerAccelerated(); + ~RasterizerAccelerated() override; + + void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; + +private: + using CachedPageMap = boost::icl::interval_map<u64, int>; + CachedPageMap cached_pages; + + std::mutex pages_mutex; +}; + +} // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index f8a807c84..0375fca17 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -8,13 +8,17 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) @@ -26,11 +30,22 @@ CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t siz CachedBufferBlock::~CachedBufferBlock() = default; OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, - std::size_t stream_size) - : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{ - rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} + const Device& device, std::size_t stream_size) + : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { + if (!device.HasFastBufferSubData()) { + return; + } + + static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); + glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); + for (const GLuint cbuf : cbufs) { + glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); + } +} -OGLBufferCache::~OGLBufferCache() = default; +OGLBufferCache::~OGLBufferCache() { + glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); +} Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { return std::make_shared<CachedBufferBlock>(cache_addr, size); @@ -69,4 +84,12 @@ void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t static_cast<GLsizeiptr>(size)); } +OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, + std::size_t size) { + DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); + const GLuint& cbuf = cbufs[cbuf_cursor++]; + glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); + return {&cbuf, 0}; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 022e7bfa9..8c7145443 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -4,10 +4,12 @@ #pragma once +#include <array> #include <memory> #include "common/common_types.h" #include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -18,12 +20,14 @@ class System; namespace OpenGL { +class Device; class OGLStreamBuffer; class RasterizerOpenGL; class CachedBufferBlock; using Buffer = std::shared_ptr<CachedBufferBlock>; +using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; class CachedBufferBlock : public VideoCommon::BufferBlock { public: @@ -38,14 +42,18 @@ private: OGLBuffer gl_buffer{}; }; -class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> { +class OGLBufferCache final : public GenericBufferCache { public: explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, - std::size_t stream_size); + const Device& device, std::size_t stream_size); ~OGLBufferCache(); const GLuint* GetEmptyBuffer(std::size_t) override; + void Acquire() noexcept { + cbuf_cursor = 0; + } + protected: Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; @@ -61,6 +69,14 @@ protected: void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) override; + + BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; + +private: + std::size_t cbuf_cursor = 0; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * + Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram> + cbufs; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 64de7e425..b30d5be74 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -51,19 +51,24 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view } // Anonymous namespace Device::Device() { + const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); const std::vector extensions = GetExtensions(); + const bool is_nvidia = vendor == "NVIDIA Corporation"; + uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; + has_shader_ballot = GLAD_GL_ARB_shader_ballot; has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = TestComponentIndexingBug(); has_precise_bug = TestPreciseBug(); + has_fast_buffer_sub_data = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); @@ -75,6 +80,7 @@ Device::Device(std::nullptr_t) { max_vertex_attributes = 16; max_varyings = 15; has_warp_intrinsics = true; + has_shader_ballot = true; has_vertex_viewport_layer = true; has_image_load_formatted = true; has_variable_aoffi = true; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index bb273c3d6..6c86fe207 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -34,6 +34,10 @@ public: return has_warp_intrinsics; } + bool HasShaderBallot() const { + return has_shader_ballot; + } + bool HasVertexViewportLayer() const { return has_vertex_viewport_layer; } @@ -54,6 +58,10 @@ public: return has_precise_bug; } + bool HasFastBufferSubData() const { + return has_fast_buffer_sub_data; + } + private: static bool TestVariableAoffi(); static bool TestComponentIndexingBug(); @@ -64,11 +72,13 @@ private: u32 max_vertex_attributes{}; u32 max_varyings{}; bool has_warp_intrinsics{}; + bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; bool has_precise_bug{}; + bool has_fast_buffer_sub_data{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6a17bed72..05f8e511b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -67,9 +67,7 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, - system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} { - OpenGLState::ApplyDefaultState(); - + system{system}, screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.Apply(); @@ -259,10 +257,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { continue; } - const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 - GLShader::MaxwellUniformData ubo{}; - ubo.SetFromRegs(gpu, stage); + ubo.SetFromRegs(gpu); const auto [buffer, offset] = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); @@ -271,10 +267,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { Shader shader{shader_cache.GetStageProgram(program)}; - const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); - SetupDrawConstBuffers(stage_enum, shader); - SetupDrawGlobalMemory(stage_enum, shader); - const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)}; + // Stage indices are 0 - 5 + const auto stage = static_cast<Maxwell::ShaderStage>(index == 0 ? 0 : index - 1); + SetupDrawConstBuffers(stage, shader); + SetupDrawGlobalMemory(stage, shader); + const auto texture_buffer_usage{SetupDrawTextures(stage, shader, base_bindings)}; const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); @@ -342,41 +339,6 @@ std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); } -template <typename Map, typename Interval> -static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { - return boost::make_iterator_range(map.equal_range(interval)); -} - -void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { - const u64 page_start{addr >> Memory::PAGE_BITS}; - const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; - - // Interval maps will erase segments if count reaches 0, so if delta is negative we have to - // subtract after iterating - const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end); - if (delta > 0) - cached_pages.add({pages_interval, delta}); - - for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { - const auto interval = pair.first & pages_interval; - const int count = pair.second; - - const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; - const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; - const u64 interval_size = interval_end_addr - interval_start_addr; - - if (delta > 0 && count == delta) - Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); - else if (delta < 0 && count == -delta) - Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); - else - ASSERT(count >= 0); - } - - if (delta < 0) - cached_pages.add({pages_interval, delta}); -} - void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { shader_cache.LoadDiskCache(stop_loading, callback); @@ -412,7 +374,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() { fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); fbkey.colors[index] = std::move(color_surface); } - fbkey.colors_count = regs.rt_control.count; + fbkey.colors_count = static_cast<u16>(regs.rt_control.count); if (depth_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even if @@ -595,6 +557,8 @@ void RasterizerOpenGL::DrawPrelude() { SyncPolygonOffset(); SyncAlphaTest(); + buffer_cache.Acquire(); + // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; @@ -916,7 +880,8 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); const auto alignment = device.GetUniformBufferAlignment(); - const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); + const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, + device.HasFastBufferSubData()); bind_ubo_pushbuffer.Push(cbuf, offset, size); } @@ -968,14 +933,14 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = [&]() { + const auto texture = [&] { if (!entry.IsBindless()) { return maxwell3d.GetStageTexture(stage, entry.GetOffset()); } - const auto cbuf = entry.GetBindlessCBuf(); - Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); - return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); + const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); + const Tegra::Texture::TextureHandle tex_handle = + maxwell3d.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); + return maxwell3d.GetTextureInfo(tex_handle); }(); if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) { @@ -998,14 +963,13 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = [&]() { + const auto texture = [&] { if (!entry.IsBindless()) { return compute.GetTexture(entry.GetOffset()); } - const auto cbuf = entry.GetBindlessCBuf(); - Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); - return compute.GetTextureInfo(tex_handle, entry.GetOffset()); + const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32( + Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset()); + return compute.GetTextureInfo(tex_handle); }(); if (SetupTexture(bindpoint, texture, entry)) { @@ -1043,14 +1007,13 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { const auto& entries = shader->GetShaderEntries().images; for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto tic = [&]() { + const auto tic = [&] { if (!entry.IsBindless()) { return compute.GetTexture(entry.GetOffset()).tic; } - const auto cbuf = entry.GetBindlessCBuf(); - Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); - return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; + const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32( + Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset()); + return compute.GetTextureInfo(tex_handle).tic; }(); SetupImage(bindpoint, tic, entry); } @@ -1091,6 +1054,15 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { } state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0; state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; + + bool flip_y = false; + if (regs.viewport_transform[0].scale_y < 0.0) { + flip_y = !flip_y; + } + if (regs.screen_y_control.y_negate != 0) { + flip_y = !flip_y; + } + state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; } void RasterizerOpenGL::SyncClipEnabled( @@ -1113,28 +1085,14 @@ void RasterizerOpenGL::SyncClipCoef() { } void RasterizerOpenGL::SyncCullMode() { - auto& maxwell3d = system.GPU().Maxwell3D(); - - const auto& regs = maxwell3d.regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.cull.enabled = regs.cull.enabled != 0; if (state.cull.enabled) { - state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); - - const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 || - regs.viewport_transform[0].scale_y < 0.0f}; - - // If the GPU is configured to flip the rasterized triangles, then we need to flip the - // notion of front and back. Note: We flip the triangles when the value of the register is 0 - // because OpenGL already does it for us. - if (flip_triangles) { - if (state.cull.front_face == GL_CCW) - state.cull.front_face = GL_CW; - else if (state.cull.front_face == GL_CW) - state.cull.front_face = GL_CCW; - } } + + state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); } void RasterizerOpenGL::SyncPrimitiveRestart() { @@ -1340,7 +1298,9 @@ void RasterizerOpenGL::SyncPolygonOffset() { state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; - state.polygon_offset.units = regs.polygon_offset_units; + + // Hardware divides polygon offset units by two + state.polygon_offset.units = regs.polygon_offset_units / 2.0f; state.polygon_offset.factor = regs.polygon_offset_factor; state.polygon_offset.clamp = regs.polygon_offset_clamp; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9c10ebda3..bd6fe5c3a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -13,12 +13,12 @@ #include <tuple> #include <utility> -#include <boost/icl/interval_map.hpp> #include <glad/glad.h> #include "common/common_types.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" @@ -51,7 +51,7 @@ namespace OpenGL { struct ScreenInfo; struct DrawParameters; -class RasterizerOpenGL : public VideoCore::RasterizerInterface { +class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info); @@ -72,7 +72,6 @@ public: const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; @@ -227,9 +226,6 @@ private: AccelDraw accelerate_draw = AccelDraw::Disabled; OGLFramebuffer clear_framebuffer; - - using CachedPageMap = boost::icl::interval_map<u64, int>; - CachedPageMap cached_pages; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 42ca3b1bd..04a239a39 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -3,13 +3,16 @@ // Refer to the license.txt file included. #include <mutex> +#include <optional> +#include <string> #include <thread> +#include <unordered_set> #include <boost/functional/hash.hpp> #include "common/assert.h" -#include "common/hash.h" #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -21,18 +24,20 @@ namespace OpenGL { +using Tegra::Engines::ShaderType; +using VideoCommon::Shader::ConstBufferLocker; using VideoCommon::Shader::ProgramCode; +using VideoCommon::Shader::ShaderIR; + +namespace { // One UBO is always reserved for emulation values on staged shaders constexpr u32 STAGE_RESERVED_UBOS = 1; -struct UnspecializedShader { - std::string code; - GLShader::ShaderEntries entries; - ProgramType program_type; -}; +constexpr u32 STAGE_MAIN_OFFSET = 10; +constexpr u32 KERNEL_MAIN_OFFSET = 0; -namespace { +constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; /// Gets the address for the specified shader stage program GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { @@ -41,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) return gpu.regs.code_address.CodeAddress() + shader_config.offset; } +/// Gets if the current instruction offset is a scheduler instruction +constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { + // Sched instructions appear once every 4 instructions. + constexpr std::size_t SchedPeriod = 4; + const std::size_t absolute_offset = offset - main_offset; + return (absolute_offset % SchedPeriod) == 0; +} + +/// Calculates the size of a program stream +std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { + constexpr std::size_t start_offset = 10; + // This is the encoded version of BRA that jumps to itself. All Nvidia + // shaders end with one. + constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; + constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; + std::size_t offset = start_offset; + while (offset < program.size()) { + const u64 instruction = program[offset]; + if (!IsSchedInstruction(offset, start_offset)) { + if ((instruction & mask) == self_jumping_branch) { + // End on Maxwell's "nop" instruction + break; + } + if (instruction == 0) { + break; + } + } + offset++; + } + // The last instruction is included in the program size + return std::min(offset + 1, program.size()); +} + /// Gets the shader program code from memory for the specified address ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, const u8* host_ptr) { @@ -51,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g }); memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), program_code.size() * sizeof(u64)); + program_code.resize(CalculateProgramSize(program_code)); return program_code; } @@ -71,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) { } } -/// Gets if the current instruction offset is a scheduler instruction -constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - /// Describes primitive behavior on geometry shaders constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { switch (primitive_mode) { @@ -121,110 +152,151 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) { return {}; } -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { - constexpr std::size_t start_offset = 10; - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; - constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; - std::size_t offset = start_offset; - std::size_t size = start_offset * sizeof(u64); - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & mask) == self_jumping_branch) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - size += sizeof(u64); - offset++; - } - // The last instruction is included in the program size - return std::min(size + sizeof(u64), program.size() * sizeof(u64)); -} - /// Hashes one (or two) program streams u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, - const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { - if (size_a == 0) { - size_a = CalculateProgramSize(code); - } - u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); - if (program_type != ProgramType::VertexA) { - return unique_identifier; - } - // VertexA programs include two programs - - std::size_t seed = 0; - boost::hash_combine(seed, unique_identifier); - - if (size_b == 0) { - size_b = CalculateProgramSize(code_b); + const ProgramCode& code_b) { + u64 unique_identifier = boost::hash_value(code); + if (program_type == ProgramType::VertexA) { + // VertexA programs include two programs + boost::hash_combine(unique_identifier, boost::hash_value(code_b)); } - const u64 identifier_b = - Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b); - boost::hash_combine(seed, identifier_b); - return static_cast<u64>(seed); + return unique_identifier; } /// Creates an unspecialized program from code streams -GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, - ProgramCode program_code, ProgramCode program_code_b) { - GLShader::ShaderSetup setup(program_code); - setup.program.size_a = CalculateProgramSize(program_code); - setup.program.size_b = 0; - if (program_type == ProgramType::VertexA) { - // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. - // Conventional HW does not support this, so we combine VertexA and VertexB into one - // stage here. - setup.SetProgramB(program_code_b); - setup.program.size_b = CalculateProgramSize(program_code_b); - } - setup.program.unique_identifier = GetUniqueIdentifier( - program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); - +std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir, + const std::optional<ShaderIR>& ir_b) { switch (program_type) { case ProgramType::VertexA: case ProgramType::VertexB: - return GLShader::GenerateVertexShader(device, setup); + return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); case ProgramType::Geometry: - return GLShader::GenerateGeometryShader(device, setup); + return GLShader::GenerateGeometryShader(device, ir); case ProgramType::Fragment: - return GLShader::GenerateFragmentShader(device, setup); + return GLShader::GenerateFragmentShader(device, ir); case ProgramType::Compute: - return GLShader::GenerateComputeShader(device, setup); + return GLShader::GenerateComputeShader(device, ir); default: UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); return {}; } } -CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, - ProgramType program_type, const ProgramVariant& variant, - bool hint_retrievable = false) { +constexpr const char* GetProgramTypeName(ProgramType program_type) { + switch (program_type) { + case ProgramType::VertexA: + case ProgramType::VertexB: + return "VS"; + case ProgramType::TessellationControl: + return "TCS"; + case ProgramType::TessellationEval: + return "TES"; + case ProgramType::Geometry: + return "GS"; + case ProgramType::Fragment: + return "FS"; + case ProgramType::Compute: + return "CS"; + } + return "UNK"; +} + +Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) { + switch (program_type) { + case ProgramType::VertexA: + case ProgramType::VertexB: + return Tegra::Engines::ShaderType::Vertex; + case ProgramType::TessellationControl: + return Tegra::Engines::ShaderType::TesselationControl; + case ProgramType::TessellationEval: + return Tegra::Engines::ShaderType::TesselationEval; + case ProgramType::Geometry: + return Tegra::Engines::ShaderType::Geometry; + case ProgramType::Fragment: + return Tegra::Engines::ShaderType::Fragment; + case ProgramType::Compute: + return Tegra::Engines::ShaderType::Compute; + } + UNREACHABLE(); + return {}; +} + +std::string GetShaderId(u64 unique_identifier, ProgramType program_type) { + return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier); +} + +Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface( + Core::System& system, ProgramType program_type) { + if (program_type == ProgramType::Compute) { + return system.GPU().KeplerCompute(); + } else { + return system.GPU().Maxwell3D(); + } +} + +std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) { + return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type), + GetConstBufferEngineInterface(system, program_type)); +} + +void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { + for (const auto& key : usage.keys) { + const auto [buffer, offset] = key.first; + locker.InsertKey(buffer, offset, key.second); + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + locker.InsertBoundSampler(offset, sampler); + } + for (const auto& [key, sampler] : usage.bindless_samplers) { + const auto [buffer, offset] = key; + locker.InsertBindlessSampler(buffer, offset, sampler); + } +} + +CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, + const ProgramCode& program_code, const ProgramCode& program_code_b, + const ProgramVariant& variant, ConstBufferLocker& locker, + bool hint_retrievable = false) { + LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); + + const bool is_compute = program_type == ProgramType::Compute; + const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker); + std::optional<ShaderIR> ir_b; + if (!program_code_b.empty()) { + ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker); + } + const auto entries = GLShader::GetEntries(ir); + auto base_bindings{variant.base_bindings}; const auto primitive_mode{variant.primitive_mode}; const auto texture_buffer_usage{variant.texture_buffer_usage}; - std::string source = R"(#version 430 core + std::string source = fmt::format(R"(// {} +#version 430 core #extension GL_ARB_separate_shader_objects : enable -#extension GL_ARB_shader_viewport_layer_array : enable -#extension GL_EXT_shader_image_load_formatted : enable -#extension GL_NV_gpu_shader5 : enable -#extension GL_NV_shader_thread_group : enable -#extension GL_NV_shader_thread_shuffle : enable -)"; - if (program_type == ProgramType::Compute) { +)", + GetShaderId(unique_identifier, program_type)); + if (is_compute) { source += "#extension GL_ARB_compute_variable_group_size : require\n"; } + if (device.HasShaderBallot()) { + source += "#extension GL_ARB_shader_ballot : require\n"; + } + if (device.HasVertexViewportLayer()) { + source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; + } + if (device.HasImageLoadFormatted()) { + source += "#extension GL_EXT_shader_image_load_formatted : require\n"; + } + if (device.HasWarpIntrinsics()) { + source += "#extension GL_NV_gpu_shader5 : require\n" + "#extension GL_NV_shader_thread_group : require\n" + "#extension GL_NV_shader_thread_shuffle : require\n"; + } source += '\n'; - if (program_type != ProgramType::Compute) { + if (!is_compute) { source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); } @@ -268,7 +340,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn } source += '\n'; - source += code; + source += GenerateGLSL(device, program_type, ir, ir_b); OGLShader shader; shader.Create(source.c_str(), GetShaderType(program_type)); @@ -278,85 +350,99 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn return program; } -std::set<GLenum> GetSupportedFormats() { - std::set<GLenum> supported_formats; - +std::unordered_set<GLenum> GetSupportedFormats() { GLint num_formats{}; glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); std::vector<GLint> formats(num_formats); glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - for (const GLint format : formats) + std::unordered_set<GLenum> supported_formats; + for (const GLint format : formats) { supported_formats.insert(static_cast<GLenum>(format)); + } return supported_formats; } } // Anonymous namespace CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, - GLShader::ProgramResult result) - : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, - unique_identifier{params.unique_identifier}, program_type{program_type}, - disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, - entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} + GLShader::ShaderEntries entries, ProgramCode program_code, + ProgramCode program_code_b) + : RasterizerCacheObject{params.host_ptr}, system{params.system}, + disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, + unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries}, + program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} { + if (!params.precompiled_variants) { + return; + } + for (const auto& pair : *params.precompiled_variants) { + auto locker = MakeLocker(system, program_type); + const auto& usage = pair->first; + FillLocker(*locker, usage); + + std::unique_ptr<LockerVariant>* locker_variant = nullptr; + const auto it = + std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) { + return variant->locker->HasEqualKeys(*locker); + }); + if (it == locker_variants.end()) { + locker_variant = &locker_variants.emplace_back(); + *locker_variant = std::make_unique<LockerVariant>(); + locker_variant->get()->locker = std::move(locker); + } else { + locker_variant = &*it; + } + locker_variant->get()->programs.emplace(usage.variant, pair->second); + } +} Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode&& program_code, - ProgramCode&& program_code_b) { - const auto code_size{CalculateProgramSize(program_code)}; - const auto code_size_b{CalculateProgramSize(program_code_b)}; - auto result{ - CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; - if (result.first.empty()) { - // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now - return {}; - } - + ProgramCode program_code, ProgramCode program_code_b) { params.disk_cache.SaveRaw(ShaderDiskCacheRaw( - params.unique_identifier, GetProgramType(program_type), - static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), - std::move(program_code), std::move(program_code_b))); - - return std::shared_ptr<CachedShader>( - new CachedShader(params, GetProgramType(program_type), std::move(result))); -} - -Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - GLShader::ProgramResult result) { + params.unique_identifier, GetProgramType(program_type), program_code, program_code_b)); + + ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)), + params.system.GPU().Maxwell3D()); + const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional<ShaderIR> ir_b; + // if (!program_code_b.empty()) { + // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET); + // } return std::shared_ptr<CachedShader>( - new CachedShader(params, GetProgramType(program_type), std::move(result))); + new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir), + std::move(program_code), std::move(program_code_b))); } -Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { - auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; +Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { + params.disk_cache.SaveRaw( + ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code)); - const auto code_size{CalculateProgramSize(code)}; - params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, - static_cast<u32>(code_size / sizeof(u64)), 0, - std::move(code), {})); - - return std::shared_ptr<CachedShader>( - new CachedShader(params, ProgramType::Compute, std::move(result))); + ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, + params.system.GPU().KeplerCompute()); + const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); + return std::shared_ptr<CachedShader>(new CachedShader( + params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {})); } -Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, - GLShader::ProgramResult result) { - return std::shared_ptr<CachedShader>( - new CachedShader(params, ProgramType::Compute, std::move(result))); +Shader CachedShader::CreateFromCache(const ShaderParameters& params, + const UnspecializedShader& unspecialized) { + return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type, + unspecialized.entries, unspecialized.code, + unspecialized.code_b)); } std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { - const auto [entry, is_cache_miss] = programs.try_emplace(variant); + UpdateVariant(); + + const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant); auto& program = entry->second; if (is_cache_miss) { - program = TryLoadProgram(variant); - if (!program) { - program = SpecializeShader(code, entries, program_type, variant); - disk_cache.SaveUsage(GetUsage(variant)); - } + program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, + variant, *curr_variant->locker); + disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker)); LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); } @@ -372,18 +458,33 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar return {program->handle, base_bindings}; } -CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { - const auto found = precompiled_programs.find(GetUsage(variant)); - if (found == precompiled_programs.end()) { - return {}; +void CachedShader::UpdateVariant() { + if (curr_variant && !curr_variant->locker->IsConsistent()) { + curr_variant = nullptr; + } + if (!curr_variant) { + for (auto& variant : locker_variants) { + if (variant->locker->IsConsistent()) { + curr_variant = variant.get(); + } + } + } + if (!curr_variant) { + auto& new_variant = locker_variants.emplace_back(); + new_variant = std::make_unique<LockerVariant>(); + new_variant->locker = MakeLocker(system, program_type); + curr_variant = new_variant.get(); } - return found->second; } -ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { +ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, + const ConstBufferLocker& locker) const { ShaderDiskCacheUsage usage; usage.unique_identifier = unique_identifier; usage.variant = variant; + usage.keys = locker.GetKeys(); + usage.bound_samplers = locker.GetBoundSamplers(); + usage.bindless_samplers = locker.GetBindlessSamplers(); return usage; } @@ -399,18 +500,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } const auto [raws, shader_usages] = *transferable; - - auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); - - const auto supported_formats{GetSupportedFormats()}; - const auto unspecialized_shaders{ - GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; - if (stop_loading) { + if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { return; } - // Track if precompiled cache was altered during loading to know if we have to serialize the - // virtual precompiled cache file back to the hard drive + const auto dumps = disk_cache.LoadPrecompiled(); + const auto supported_formats = GetSupportedFormats(); + + // Track if precompiled cache was altered during loading to know if we have to + // serialize the virtual precompiled cache file back to the hard drive bool precompiled_cache_altered = false; // Inform the frontend about shader build initialization @@ -433,9 +531,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } const auto& usage{shader_usages[i]}; - LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})", - usage.unique_identifier, i, shader_usages.size()); - const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; const auto dump{dumps.find(usage)}; @@ -449,21 +544,28 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } if (!shader) { - shader = SpecializeShader(unspecialized.code, unspecialized.entries, - unspecialized.program_type, usage.variant, true); + auto locker{MakeLocker(system, unspecialized.program_type)}; + FillLocker(*locker, usage); + shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, + unspecialized.code, unspecialized.code_b, usage.variant, + *locker, true); } - std::scoped_lock lock(mutex); + std::scoped_lock lock{mutex}; if (callback) { callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, shader_usages.size()); } precompiled_programs.emplace(usage, std::move(shader)); + + // TODO(Rodrigo): Is there a better way to do this? + precompiled_variants[usage.unique_identifier].push_back( + precompiled_programs.find(usage)); } }; - const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; + const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; const std::size_t bucket_size{shader_usages.size() / num_workers}; std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); std::vector<std::thread> threads(num_workers); @@ -483,7 +585,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, if (compilation_failed) { // Invalidate the precompiled cache if a shader dumped shader was rejected disk_cache.InvalidatePrecompiled(); - dumps.clear(); precompiled_cache_altered = true; return; } @@ -491,8 +592,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } - // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before - // precompiling them + // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw + // before precompiling them for (std::size_t i = 0; i < shader_usages.size(); ++i) { const auto& usage{shader_usages[i]}; @@ -508,9 +609,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } -CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) { +const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { + const auto it = precompiled_variants.find(unique_identifier); + return it == precompiled_variants.end() ? nullptr : &it->second; +} +CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( + const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) { if (supported_formats.find(dump.binary_format) == supported_formats.end()) { LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); return {}; @@ -532,56 +637,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( return shader; } -std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders( +bool ShaderCacheOpenGL::GenerateUnspecializedShaders( const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, - const std::vector<ShaderDiskCacheRaw>& raws, - const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) { - std::unordered_map<u64, UnspecializedShader> unspecialized; - + const std::vector<ShaderDiskCacheRaw>& raws) { if (callback) { callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); } for (std::size_t i = 0; i < raws.size(); ++i) { if (stop_loading) { - return {}; + return false; } const auto& raw{raws[i]}; const u64 unique_identifier{raw.GetUniqueIdentifier()}; const u64 calculated_hash{ GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; if (unique_identifier != calculated_hash) { - LOG_ERROR( - Render_OpenGL, - "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache", - raw.GetUniqueIdentifier(), calculated_hash); + LOG_ERROR(Render_OpenGL, + "Invalid hash in entry={:016x} (obtained hash={:016x}) - " + "removing shader cache", + raw.GetUniqueIdentifier(), calculated_hash); disk_cache.InvalidateTransferable(); - return {}; + return false; } - GLShader::ProgramResult result; - if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) { - // If it's stored in the precompiled file, avoid decompiling it here - const auto& stored_decompiled{it->second}; - result = {stored_decompiled.code, stored_decompiled.entries}; - } else { - // Otherwise decompile the shader at boot and save the result to the decompiled file - result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(), - raw.GetProgramCodeB()); - disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); - } - - precompiled_shaders.insert({unique_identifier, result}); - - unspecialized.insert( - {raw.GetUniqueIdentifier(), - {std::move(result.first), std::move(result.second), raw.GetProgramType()}}); + const u32 main_offset = + raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType())); + const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional<ShaderIR> ir_b; + // if (raw.HasProgramA()) { + // ir_b.emplace(raw.GetProgramCodeB(), main_offset); + // } + + UnspecializedShader unspecialized; + unspecialized.entries = GLShader::GetEntries(ir); + unspecialized.program_type = raw.GetProgramType(); + unspecialized.code = raw.GetProgramCode(); + unspecialized.code_b = raw.GetProgramCodeB(); + unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); if (callback) { callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); } } - return unspecialized; + return true; } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { @@ -590,37 +691,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { } auto& memory_manager{system.GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(system, program)}; + const GPUVAddr address{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto host_ptr{memory_manager.GetPointer(program_addr)}; + const auto host_ptr{memory_manager.GetPointer(address)}; Shader shader{TryGet(host_ptr)}; if (shader) { return last_shaders[static_cast<std::size_t>(program)] = shader; } // No shader found - create a new one - ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; - ProgramCode program_code_b; - const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; - if (is_program_a) { - const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; - program_code_b = GetShaderCode(memory_manager, program_addr_b, - memory_manager.GetPointer(program_addr_b)); - } - - const auto unique_identifier = - GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); - const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, - host_ptr, unique_identifier}; - - const auto found = precompiled_shaders.find(unique_identifier); - if (found == precompiled_shaders.end()) { - shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code), - std::move(program_code_b)); + ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; + ProgramCode code_b; + if (program == Maxwell::ShaderProgram::VertexA) { + const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; + code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); + } + + const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b); + const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); + const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; + const ShaderParameters params{system, disk_cache, precompiled_variants, device, + cpu_addr, host_ptr, unique_identifier}; + + const auto found = unspecialized_shaders.find(unique_identifier); + if (found == unspecialized_shaders.end()) { + shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), + std::move(code_b)); } else { - shader = CachedShader::CreateStageFromCache(params, program, found->second); + shader = CachedShader::CreateFromCache(params, found->second); } Register(shader); @@ -638,15 +737,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { // No kernel found - create a new one auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; + const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, - host_ptr, unique_identifier}; + const ShaderParameters params{system, disk_cache, precompiled_variants, device, + cpu_addr, host_ptr, unique_identifier}; - const auto found = precompiled_shaders.find(unique_identifier); - if (found == precompiled_shaders.end()) { + const auto found = unspecialized_shaders.find(unique_identifier); + if (found == unspecialized_shaders.end()) { kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); } else { - kernel = CachedShader::CreateKernelFromCache(params, found->second); + kernel = CachedShader::CreateFromCache(params, found->second); } Register(kernel); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index de195cc5d..6bd7c9cf1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -8,9 +8,10 @@ #include <atomic> #include <bitset> #include <memory> -#include <set> +#include <string> #include <tuple> #include <unordered_map> +#include <unordered_set> #include <vector> #include <glad/glad.h> @@ -20,6 +21,8 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" +#include "video_core/shader/const_buffer_locker.h" +#include "video_core/shader/shader_ir.h" namespace Core { class System; @@ -40,11 +43,19 @@ using Shader = std::shared_ptr<CachedShader>; using CachedProgram = std::shared_ptr<OGLProgram>; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; -using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; +using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; + +struct UnspecializedShader { + GLShader::ShaderEntries entries; + ProgramType program_type; + ProgramCode code; + ProgramCode code_b; +}; struct ShaderParameters { + Core::System& system; ShaderDiskCacheOpenGL& disk_cache; - const PrecompiledPrograms& precompiled_programs; + const PrecompiledVariants* precompiled_variants; const Device& device; VAddr cpu_addr; u8* host_ptr; @@ -55,23 +66,18 @@ class CachedShader final : public RasterizerCacheObject { public: static Shader CreateStageFromMemory(const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode&& program_code, ProgramCode&& program_code_b); - - static Shader CreateStageFromCache(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - GLShader::ProgramResult result); + ProgramCode program_code, ProgramCode program_code_b); + static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); - static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); - - static Shader CreateKernelFromCache(const ShaderParameters& params, - GLShader::ProgramResult result); + static Shader CreateFromCache(const ShaderParameters& params, + const UnspecializedShader& unspecialized); VAddr GetCpuAddr() const override { return cpu_addr; } std::size_t GetSizeInBytes() const override { - return shader_length; + return program_code.size() * sizeof(u64); } /// Gets the shader entries for the shader @@ -83,24 +89,36 @@ public: std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); private: + struct LockerVariant { + std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker; + std::unordered_map<ProgramVariant, CachedProgram> programs; + }; + explicit CachedShader(const ShaderParameters& params, ProgramType program_type, - GLShader::ProgramResult result); + GLShader::ShaderEntries entries, ProgramCode program_code, + ProgramCode program_code_b); - CachedProgram TryLoadProgram(const ProgramVariant& variant) const; + void UpdateVariant(); - ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; + ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, + const VideoCommon::Shader::ConstBufferLocker& locker) const; + + Core::System& system; + ShaderDiskCacheOpenGL& disk_cache; + const Device& device; VAddr cpu_addr{}; + u64 unique_identifier{}; ProgramType program_type{}; - ShaderDiskCacheOpenGL& disk_cache; - const PrecompiledPrograms& precompiled_programs; GLShader::ShaderEntries entries; - std::string code; - std::size_t shader_length{}; - std::unordered_map<ProgramVariant, CachedProgram> programs; + ProgramCode program_code; + ProgramCode program_code_b; + + LockerVariant* curr_variant = nullptr; + std::vector<std::unique_ptr<LockerVariant>> locker_variants; }; class ShaderCacheOpenGL final : public RasterizerCache<Shader> { @@ -123,21 +141,26 @@ protected: void FlushObjectInner(const Shader& object) override {} private: - std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( - const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, - const std::vector<ShaderDiskCacheRaw>& raws, - const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled); + bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback, + const std::vector<ShaderDiskCacheRaw>& raws); CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, - const std::set<GLenum>& supported_formats); + const std::unordered_set<GLenum>& supported_formats); + + const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; Core::System& system; Core::Frontend::EmuWindow& emu_window; const Device& device; + ShaderDiskCacheOpenGL disk_cache; - PrecompiledShaders precompiled_shaders; PrecompiledPrograms precompiled_programs; + std::unordered_map<u64, PrecompiledVariants> precompiled_variants; + + std::unordered_map<u64, UnspecializedShader> unspecialized_shaders; + std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e6b36a0f2..4f2b49170 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -19,6 +19,7 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" +#include "video_core/shader/ast.h" #include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" @@ -242,6 +243,26 @@ constexpr const char* GetTypeString(Type type) { } } +constexpr const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { + switch (image_type) { + case Tegra::Shader::ImageType::Texture1D: + return "1D"; + case Tegra::Shader::ImageType::TextureBuffer: + return "Buffer"; + case Tegra::Shader::ImageType::Texture1DArray: + return "1DArray"; + case Tegra::Shader::ImageType::Texture2D: + return "2D"; + case Tegra::Shader::ImageType::Texture2DArray: + return "2DArray"; + case Tegra::Shader::ImageType::Texture3D: + return "3D"; + default: + UNREACHABLE(); + return "1D"; + } +} + /// Generates code to use for a swizzle operation. constexpr const char* GetSwizzle(u32 element) { constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; @@ -314,39 +335,24 @@ constexpr bool IsVertexShader(ProgramType stage) { return stage == ProgramType::VertexA || stage == ProgramType::VertexB; } +class ASTDecompiler; +class ExprDecompiler; + class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage, std::string suffix) : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} - void Decompile() { - DeclareVertex(); - DeclareGeometry(); - DeclareRegisters(); - DeclarePredicates(); - DeclareLocalMemory(); - DeclareSharedMemory(); - DeclareInternalFlags(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareConstantBuffers(); - DeclareGlobalMemory(); - DeclareSamplers(); - DeclarePhysicalAttributeReader(); - DeclareImages(); - - code.AddLine("void execute_{}() {{", suffix); - ++code.scope; - + void DecompileBranchMode() { // VM's program counter const auto first_address = ir.GetBasicBlocks().begin()->first; code.AddLine("uint jmp_to = {}U;", first_address); // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems // unlikely that shaders will use 20 nested SSYs and PBKs. + constexpr u32 FLOW_STACK_SIZE = 20; if (!ir.IsFlowStackDisabled()) { - constexpr u32 FLOW_STACK_SIZE = 20; for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); @@ -372,38 +378,47 @@ public: code.AddLine("default: return;"); code.AddLine("}}"); - for (std::size_t i = 0; i < 2; ++i) { - --code.scope; - code.AddLine("}}"); + --code.scope; + code.AddLine("}}"); + } + + void DecompileAST(); + + void Decompile() { + DeclareVertex(); + DeclareGeometry(); + DeclareRegisters(); + DeclarePredicates(); + DeclareLocalMemory(); + DeclareInternalFlags(); + DeclareInputAttributes(); + DeclareOutputAttributes(); + DeclareConstantBuffers(); + DeclareGlobalMemory(); + DeclareSamplers(); + DeclarePhysicalAttributeReader(); + + code.AddLine("void execute_{}() {{", suffix); + ++code.scope; + + if (ir.IsDecompiled()) { + DecompileAST(); + } else { + DecompileBranchMode(); } + + --code.scope; + code.AddLine("}}"); } std::string GetResult() { return code.GetResult(); } - ShaderEntries GetShaderEntries() const { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), - cbuf.first); - } - for (const auto& sampler : ir.GetSamplers()) { - entries.samplers.emplace_back(sampler); - } - for (const auto& [offset, image] : ir.GetImages()) { - entries.images.emplace_back(image); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, - usage.is_read, usage.is_written); - } - entries.clip_distances = ir.GetClipDistances(); - entries.shader_length = ir.GetLength(); - return entries; - } - private: + friend class ASTDecompiler; + friend class ExprDecompiler; + void DeclareVertex() { if (!IsVertexShader(stage)) return; @@ -720,27 +735,7 @@ private: void DeclareImages() { const auto& images{ir.GetImages()}; - for (const auto& [offset, image] : images) { - const char* image_type = [&] { - switch (image.GetType()) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "Buffer"; - case Tegra::Shader::ImageType::Texture1DArray: - return "1DArray"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "2DArray"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - default: - UNREACHABLE(); - return "1D"; - } - }(); - + for (const auto& image : images) { std::string qualifier = "coherent volatile"; if (image.IsRead() && !image.IsWritten()) { qualifier += " readonly"; @@ -748,13 +743,10 @@ private: qualifier += " writeonly"; } - std::string format; - if (image.IsAtomic()) { - format = "r32ui, "; - } - + const char* format = image.IsAtomic() ? "r32ui, " : ""; + const char* type_declaration = GetImageTypeDeclaration(image.GetType()); code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format, - image.GetIndex(), qualifier, image_type, GetImage(image)); + image.GetIndex(), qualifier, type_declaration, GetImage(image)); } if (!images.empty()) { code.AddNewLine(); @@ -1135,7 +1127,7 @@ private: for (const auto& variant : extras) { if (const auto argument = std::get_if<TextureArgument>(&variant)) { expr += GenerateTextureArgument(*argument); - } else if (std::get_if<TextureAoffi>(&variant)) { + } else if (std::holds_alternative<TextureAoffi>(variant)) { expr += GenerateTextureAoffi(meta->aoffi); } else { UNREACHABLE(); @@ -1145,8 +1137,8 @@ private: return expr + ')'; } - std::string GenerateTextureArgument(TextureArgument argument) { - const auto [type, operand] = argument; + std::string GenerateTextureArgument(const TextureArgument& argument) { + const auto& [type, operand] = argument; if (operand == nullptr) { return {}; } @@ -1222,7 +1214,7 @@ private: std::string BuildImageValues(Operation operation) { constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; const std::size_t values_count{meta.values.size()}; std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); @@ -1387,6 +1379,26 @@ private: return GenerateUnary(operation, "float", Type::Float, type); } + Expression FSwizzleAdd(Operation operation) { + const std::string op_a = VisitOperand(operation, 0).AsFloat(); + const std::string op_b = VisitOperand(operation, 1).AsFloat(); + + if (!device.HasShaderBallot()) { + LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); + return {fmt::format("{} + {}", op_a, op_b), Type::Float}; + } + + const std::string instr_mask = VisitOperand(operation, 2).AsUint(); + const std::string mask = code.GenerateTemporary(); + code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, + instr_mask); + + const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); + const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); + return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), + Type::Float}; + } + Expression ICastFloat(Operation operation) { return GenerateUnary(operation, "int", Type::Int, Type::Float); } @@ -1494,6 +1506,8 @@ private: case Tegra::Shader::HalfType::H1_H1: return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; } + UNREACHABLE(); + return {"0", Type::Int}; } Expression HMergeF32(Operation operation) { @@ -1676,7 +1690,7 @@ private: const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; return {GenerateTexture(operation, "Gather", - {TextureArgument{type, meta->component}, TextureAoffi{}}) + + {TextureAoffi{}, TextureArgument{type, meta->component}}) + GetSwizzle(meta->element), Type::Float}; } @@ -1765,14 +1779,14 @@ private: return {"0", Type::Int}; } - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), Type::Uint}; } Expression ImageStore(Operation operation) { - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), BuildIntegerCoordinates(operation), BuildImageValues(operation)); return {}; @@ -1780,7 +1794,7 @@ private: template <const std::string_view& opname> Expression AtomicImage(Operation operation) { - const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; ASSERT(meta.values.size() == 1); return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), @@ -1822,10 +1836,9 @@ private: return {}; } - Expression Exit(Operation operation) { + void PreExit() { if (stage != ProgramType::Fragment) { - code.AddLine("return;"); - return {}; + return; } const auto& used_registers = ir.GetRegisters(); const auto SafeGetRegister = [&](u32 reg) -> Expression { @@ -1857,7 +1870,10 @@ private: // already contains one past the last color register. code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); } + } + Expression Exit(Operation operation) { + PreExit(); code.AddLine("return;"); return {}; } @@ -1876,10 +1892,6 @@ private: Expression EmitVertex(Operation operation) { ASSERT_MSG(stage == ProgramType::Geometry, "EmitVertex is expected to be used in a geometry shader."); - - // If a geometry shader is attached, it will always flip (it's the last stage before - // fragment). For more info about flipping, refer to gl_shader_gen.cpp. - code.AddLine("gl_Position.xy *= viewport_flip.xy;"); code.AddLine("EmitVertex();"); return {}; } @@ -1887,14 +1899,12 @@ private: Expression EndPrimitive(Operation operation) { ASSERT_MSG(stage == ProgramType::Geometry, "EndPrimitive is expected to be used in a geometry shader."); - code.AddLine("EndPrimitive();"); return {}; } Expression YNegate(Operation operation) { - // Config pack's third value is Y_NEGATE's state. - return {"config_pack[2]", Type::Uint}; + return {"y_direction", Type::Float}; } template <u32 element> @@ -1946,34 +1956,24 @@ private: return Vote(operation, "allThreadsEqualNV"); } - template <const std::string_view& func> - Expression Shuffle(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsFloat(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); - // On a "single-thread" device we are either on the same thread or out of bounds. Both - // cases return the passed value. - return {value, Type::Float}; + Expression ThreadId(Operation operation) { + if (!device.HasShaderBallot()) { + LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); + return {"0U", Type::Uint}; } - - const std::string index = VisitOperand(operation, 1).AsUint(); - const std::string width = VisitOperand(operation, 2).AsUint(); - return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; + return {"gl_SubGroupInvocationARB", Type::Uint}; } - template <const std::string_view& func> - Expression InRangeShuffle(Operation operation) { - const std::string index = VisitOperand(operation, 0).AsUint(); - const std::string width = VisitOperand(operation, 1).AsUint(); - if (!device.HasWarpIntrinsics()) { - // On a "single-thread" device we are only in bounds when the requested index is 0. - return {fmt::format("({} == 0U)", index), Type::Bool}; + Expression ShuffleIndexed(Operation operation) { + std::string value = VisitOperand(operation, 0).AsFloat(); + + if (!device.HasShaderBallot()) { + LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); + return {std::move(value), Type::Float}; } - const std::string in_range = code.GenerateTemporary(); - code.AddLine("bool {};", in_range); - code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); - return {in_range, Type::Bool}; + const std::string index = VisitOperand(operation, 1).AsUint(); + return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; } struct Func final { @@ -1985,11 +1985,6 @@ private: static constexpr std::string_view Or = "Or"; static constexpr std::string_view Xor = "Xor"; static constexpr std::string_view Exchange = "Exchange"; - - static constexpr std::string_view ShuffleIndexed = "shuffleNV"; - static constexpr std::string_view ShuffleUp = "shuffleUpNV"; - static constexpr std::string_view ShuffleDown = "shuffleDownNV"; - static constexpr std::string_view ShuffleButterfly = "shuffleXorNV"; }; static constexpr std::array operation_decompilers = { @@ -2020,6 +2015,7 @@ private: &GLSLDecompiler::FTrunc, &GLSLDecompiler::FCastInteger<Type::Int>, &GLSLDecompiler::FCastInteger<Type::Uint>, + &GLSLDecompiler::FSwizzleAdd, &GLSLDecompiler::Add<Type::Int>, &GLSLDecompiler::Mul<Type::Int>, @@ -2155,15 +2151,8 @@ private: &GLSLDecompiler::VoteAny, &GLSLDecompiler::VoteEqual, - &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, - &GLSLDecompiler::Shuffle<Func::ShuffleUp>, - &GLSLDecompiler::Shuffle<Func::ShuffleDown>, - &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, - - &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, - &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, - &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, - &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, + &GLSLDecompiler::ThreadId, + &GLSLDecompiler::ShuffleIndexed, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); @@ -2229,7 +2218,7 @@ private: code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); } - std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { + std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { return fmt::format("{}_{}_{}", name, index, suffix); } @@ -2254,27 +2243,259 @@ private: ShaderWriter code; }; +std::string GetFlowVariable(u32 i) { + return fmt::format("flow_var_{}", i); +} + +class ExprDecompiler { +public: + explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} + + void operator()(const ExprAnd& expr) { + inner += "( "; + std::visit(*this, *expr.operand1); + inner += " && "; + std::visit(*this, *expr.operand2); + inner += ')'; + } + + void operator()(const ExprOr& expr) { + inner += "( "; + std::visit(*this, *expr.operand1); + inner += " || "; + std::visit(*this, *expr.operand2); + inner += ')'; + } + + void operator()(const ExprNot& expr) { + inner += '!'; + std::visit(*this, *expr.operand1); + } + + void operator()(const ExprPredicate& expr) { + const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); + inner += decomp.GetPredicate(pred); + } + + void operator()(const ExprCondCode& expr) { + const Node cc = decomp.ir.GetConditionCode(expr.cc); + std::string target; + + if (const auto pred = std::get_if<PredicateNode>(&*cc)) { + const auto index = pred->GetIndex(); + switch (index) { + case Tegra::Shader::Pred::NeverExecute: + target = "false"; + break; + case Tegra::Shader::Pred::UnusedIndex: + target = "true"; + break; + default: + target = decomp.GetPredicate(index); + break; + } + } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { + target = decomp.GetInternalFlag(flag->GetFlag()); + } else { + UNREACHABLE(); + } + inner += target; + } + + void operator()(const ExprVar& expr) { + inner += GetFlowVariable(expr.var_index); + } + + void operator()(const ExprBoolean& expr) { + inner += expr.value ? "true" : "false"; + } + + void operator()(VideoCommon::Shader::ExprGprEqual& expr) { + inner += + "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')'; + } + + const std::string& GetResult() const { + return inner; + } + +private: + std::string inner; + GLSLDecompiler& decomp; +}; + +class ASTDecompiler { +public: + explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} + + void operator()(const ASTProgram& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(const ASTIfThen& ast) { + ExprDecompiler expr_parser{decomp}; + std::visit(expr_parser, *ast.condition); + decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); + decomp.code.scope++; + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + decomp.code.scope--; + decomp.code.AddLine("}}"); + } + + void operator()(const ASTIfElse& ast) { + decomp.code.AddLine("else {{"); + decomp.code.scope++; + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + decomp.code.scope--; + decomp.code.AddLine("}}"); + } + + void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { + UNREACHABLE(); + } + + void operator()(const ASTBlockDecoded& ast) { + decomp.VisitBlock(ast.nodes); + } + + void operator()(const ASTVarSet& ast) { + ExprDecompiler expr_parser{decomp}; + std::visit(expr_parser, *ast.condition); + decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); + } + + void operator()(const ASTLabel& ast) { + decomp.code.AddLine("// Label_{}:", ast.index); + } + + void operator()([[maybe_unused]] const ASTGoto& ast) { + UNREACHABLE(); + } + + void operator()(const ASTDoWhile& ast) { + ExprDecompiler expr_parser{decomp}; + std::visit(expr_parser, *ast.condition); + decomp.code.AddLine("do {{"); + decomp.code.scope++; + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + decomp.code.scope--; + decomp.code.AddLine("}} while({});", expr_parser.GetResult()); + } + + void operator()(const ASTReturn& ast) { + const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); + if (!is_true) { + ExprDecompiler expr_parser{decomp}; + std::visit(expr_parser, *ast.condition); + decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); + decomp.code.scope++; + } + if (ast.kills) { + decomp.code.AddLine("discard;"); + } else { + decomp.PreExit(); + decomp.code.AddLine("return;"); + } + if (!is_true) { + decomp.code.scope--; + decomp.code.AddLine("}}"); + } + } + + void operator()(const ASTBreak& ast) { + const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); + if (!is_true) { + ExprDecompiler expr_parser{decomp}; + std::visit(expr_parser, *ast.condition); + decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); + decomp.code.scope++; + } + decomp.code.AddLine("break;"); + if (!is_true) { + decomp.code.scope--; + decomp.code.AddLine("}}"); + } + } + + void Visit(const ASTNode& node) { + std::visit(*this, *node->GetInnerData()); + } + +private: + GLSLDecompiler& decomp; +}; + +void GLSLDecompiler::DecompileAST() { + const u32 num_flow_variables = ir.GetASTNumVariables(); + for (u32 i = 0; i < num_flow_variables; i++) { + code.AddLine("bool {} = false;", GetFlowVariable(i)); + } + + ASTDecompiler decompiler{*this}; + decompiler.Visit(ir.GetASTProgram()); +} + } // Anonymous namespace +ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { + ShaderEntries entries; + for (const auto& cbuf : ir.GetConstantBuffers()) { + entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), + cbuf.first); + } + for (const auto& [base, usage] : ir.GetGlobalMemory()) { + entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, + usage.is_written); + } + for (const auto& sampler : ir.GetSamplers()) { + entries.samplers.emplace_back(sampler); + } + for (const auto& image : ir.GetImages()) { + entries.images.emplace_back(image); + } + entries.clip_distances = ir.GetClipDistances(); + entries.shader_length = ir.GetLength(); + return entries; +} + std::string GetCommonDeclarations() { - return fmt::format( - "#define ftoi floatBitsToInt\n" - "#define ftou floatBitsToUint\n" - "#define itof intBitsToFloat\n" - "#define utof uintBitsToFloat\n\n" - "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" - " bvec2 is_nan1 = isnan(pair1);\n" - " bvec2 is_nan2 = isnan(pair2);\n" - " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " - "is_nan2.y);\n" - "}}\n\n"); + return R"(#define ftoi floatBitsToInt +#define ftou floatBitsToUint +#define itof intBitsToFloat +#define utof uintBitsToFloat + +bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { + bvec2 is_nan1 = isnan(pair1); + bvec2 is_nan2 = isnan(pair2); + return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); +} + +const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); +const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); +)"; } -ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, - const std::string& suffix) { +std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, + const std::string& suffix) { GLSLDecompiler decompiler(device, ir, stage, suffix); decompiler.Decompile(); - return {decompiler.GetResult(), decompiler.GetShaderEntries()}; + return decompiler.GetResult(); } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index e538dc001..b1e75e6cc 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -34,10 +34,7 @@ enum class ProgramType : u32 { namespace OpenGL::GLShader { -struct ShaderEntries; - using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using ProgramResult = std::pair<std::string, ShaderEntries>; using SamplerEntry = VideoCommon::Shader::Sampler; using ImageEntry = VideoCommon::Shader::Image; @@ -85,17 +82,18 @@ private: struct ShaderEntries { std::vector<ConstBufferEntry> const_buffers; + std::vector<GlobalMemoryEntry> global_memory_entries; std::vector<SamplerEntry> samplers; - std::vector<SamplerEntry> bindless_samplers; std::vector<ImageEntry> images; - std::vector<GlobalMemoryEntry> global_memory_entries; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; std::size_t shader_length{}; }; +ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); + std::string GetCommonDeclarations(); -ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - ProgramType stage, const std::string& suffix); +std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + ProgramType stage, const std::string& suffix); } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 6a7012b54..184a565e6 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -22,6 +22,29 @@ namespace OpenGL { +using VideoCommon::Shader::BindlessSamplerMap; +using VideoCommon::Shader::BoundSamplerMap; +using VideoCommon::Shader::KeyMap; + +namespace { + +struct ConstBufferKey { + u32 cbuf; + u32 offset; + u32 value; +}; + +struct BoundSamplerKey { + u32 offset; + Tegra::Engines::SamplerDescriptor sampler; +}; + +struct BindlessSamplerKey { + u32 cbuf; + u32 offset; + Tegra::Engines::SamplerDescriptor sampler; +}; + using ShaderCacheVersionHash = std::array<u8, 64>; enum class TransferableEntryKind : u32 { @@ -29,18 +52,10 @@ enum class TransferableEntryKind : u32 { Usage, }; -enum class PrecompiledEntryKind : u32 { - Decompiled, - Dump, -}; - -constexpr u32 NativeVersion = 4; +constexpr u32 NativeVersion = 5; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ShaderDiskCacheUsage) == 40); - -namespace { ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; @@ -49,13 +64,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { return hash; } -} // namespace +} // Anonymous namespace ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, - u32 program_code_size, u32 program_code_size_b, ProgramCode program_code, ProgramCode program_code_b) : unique_identifier{unique_identifier}, program_type{program_type}, - program_code_size{program_code_size}, program_code_size_b{program_code_size_b}, program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; @@ -90,15 +103,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(program_type)) != 1 || - file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) { + file.WriteObject(static_cast<u32>(program_code.size())) != 1 || + file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) { return false; } - if (file.WriteArray(program_code.data(), program_code_size) != program_code_size) + if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size()) return false; if (HasProgramA() && - file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { + file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) { return false; } return true; @@ -112,44 +126,47 @@ std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskC ShaderDiskCacheOpenGL::LoadTransferable() { // Skip games without title id const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; - if (!Settings::values.use_disk_shader_cache || !has_title_id) + if (!Settings::values.use_disk_shader_cache || !has_title_id) { return {}; - tried_to_load = true; + } FileUtil::IOFile file(GetTransferablePath(), "rb"); if (!file.IsOpen()) { LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}", GetTitleID()); + is_usable = true; return {}; } u32 version{}; if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { LOG_ERROR(Render_OpenGL, - "Failed to get transferable cache version for title id={} - skipping", + "Failed to get transferable cache version for title id={}, skipping", GetTitleID()); return {}; } if (version < NativeVersion) { - LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing"); + LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); file.Close(); InvalidateTransferable(); + is_usable = true; return {}; } if (version > NativeVersion) { LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " - "of the emulator - skipping"); + "of the emulator, skipping"); return {}; } // Version is valid, load the shaders + constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; std::vector<ShaderDiskCacheRaw> raws; std::vector<ShaderDiskCacheUsage> usages; while (file.Tell() < file.GetSize()) { TransferableEntryKind kind{}; if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { - LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping"); + LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); return {}; } @@ -157,7 +174,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() { case TransferableEntryKind::Raw: { ShaderDiskCacheRaw entry; if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping"); + LOG_ERROR(Render_OpenGL, error_loading); return {}; } transferable.insert({entry.GetUniqueIdentifier(), {}}); @@ -165,30 +182,62 @@ ShaderDiskCacheOpenGL::LoadTransferable() { break; } case TransferableEntryKind::Usage: { - ShaderDiskCacheUsage usage{}; - if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping"); + ShaderDiskCacheUsage usage; + + u32 num_keys{}; + u32 num_bound_samplers{}; + u32 num_bindless_samplers{}; + if (file.ReadArray(&usage.unique_identifier, 1) != 1 || + file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || + file.ReadArray(&num_bound_samplers, 1) != 1 || + file.ReadArray(&num_bindless_samplers, 1) != 1) { + LOG_ERROR(Render_OpenGL, error_loading); return {}; } + + std::vector<ConstBufferKey> keys(num_keys); + std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); + std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); + if (file.ReadArray(keys.data(), keys.size()) != keys.size() || + file.ReadArray(bound_samplers.data(), bound_samplers.size()) != + bound_samplers.size() || + file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) != + bindless_samplers.size()) { + LOG_ERROR(Render_OpenGL, error_loading); + return {}; + } + for (const auto& key : keys) { + usage.keys.insert({{key.cbuf, key.offset}, key.value}); + } + for (const auto& key : bound_samplers) { + usage.bound_samplers.emplace(key.offset, key.sampler); + } + for (const auto& key : bindless_samplers) { + usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); + } + usages.push_back(std::move(usage)); break; } default: - LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping", + LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping", static_cast<u32>(kind)); return {}; } } - return {{raws, usages}}; + is_usable = true; + return {{std::move(raws), std::move(usages)}}; } -std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> +std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> ShaderDiskCacheOpenGL::LoadPrecompiled() { - if (!IsUsable()) + if (!is_usable) { return {}; + } - FileUtil::IOFile file(GetPrecompiledPath(), "rb"); + std::string path = GetPrecompiledPath(); + FileUtil::IOFile file(path, "rb"); if (!file.IsOpen()) { LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", GetTitleID()); @@ -198,7 +247,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { const auto result = LoadPrecompiledFile(file); if (!result) { LOG_INFO(Render_OpenGL, - "Failed to load precompiled cache for game with title id={} - removing", + "Failed to load precompiled cache for game with title id={}, removing", GetTitleID()); file.Close(); InvalidatePrecompiled(); @@ -207,7 +256,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { return *result; } -std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> +std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { // Read compressed file from disk and decompress to virtual precompiled cache file std::vector<u8> compressed(file.GetSize()); @@ -227,238 +276,56 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; ShaderDumpsMap dumps; while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - PrecompiledEntryKind kind{}; - if (!LoadObjectFromPrecompiled(kind)) { + u32 num_keys{}; + u32 num_bound_samplers{}; + u32 num_bindless_samplers{}; + ShaderDiskCacheUsage usage; + if (!LoadObjectFromPrecompiled(usage.unique_identifier) || + !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || + !LoadObjectFromPrecompiled(num_bound_samplers) || + !LoadObjectFromPrecompiled(num_bindless_samplers)) { return {}; } - - switch (kind) { - case PrecompiledEntryKind::Decompiled: { - u64 unique_identifier{}; - if (!LoadObjectFromPrecompiled(unique_identifier)) { - return {}; - } - - auto entry = LoadDecompiledEntry(); - if (!entry) { - return {}; - } - decompiled.insert({unique_identifier, std::move(*entry)}); - break; - } - case PrecompiledEntryKind::Dump: { - ShaderDiskCacheUsage usage; - if (!LoadObjectFromPrecompiled(usage)) { - return {}; - } - - ShaderDiskCacheDump dump; - if (!LoadObjectFromPrecompiled(dump.binary_format)) { - return {}; - } - - u32 binary_length{}; - if (!LoadObjectFromPrecompiled(binary_length)) { - return {}; - } - - dump.binary.resize(binary_length); - if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { - return {}; - } - - dumps.insert({usage, dump}); - break; - } - default: + std::vector<ConstBufferKey> keys(num_keys); + std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); + std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); + if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) || + !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) != + bound_samplers.size() || + !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) != + bindless_samplers.size()) { return {}; } - } - return {{decompiled, dumps}}; -} - -std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() { - u32 code_size{}; - if (!LoadObjectFromPrecompiled(code_size)) { - return {}; - } - - std::string code(code_size, '\0'); - if (!LoadArrayFromPrecompiled(code.data(), code.size())) { - return {}; - } - - ShaderDiskCacheDecompiled entry; - entry.code = std::move(code); - - u32 const_buffers_count{}; - if (!LoadObjectFromPrecompiled(const_buffers_count)) { - return {}; - } - - for (u32 i = 0; i < const_buffers_count; ++i) { - u32 max_offset{}; - u32 index{}; - bool is_indirect{}; - if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(is_indirect)) { - return {}; + for (const auto& key : keys) { + usage.keys.insert({{key.cbuf, key.offset}, key.value}); } - entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); - } - - u32 samplers_count{}; - if (!LoadObjectFromPrecompiled(samplers_count)) { - return {}; - } - - for (u32 i = 0; i < samplers_count; ++i) { - u64 offset{}; - u64 index{}; - u32 type{}; - bool is_array{}; - bool is_shadow{}; - bool is_bindless{}; - if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) || - !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) { - return {}; + for (const auto& key : bound_samplers) { + usage.bound_samplers.emplace(key.offset, key.sampler); } - entry.entries.samplers.emplace_back( - static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); - } - - u32 images_count{}; - if (!LoadObjectFromPrecompiled(images_count)) { - return {}; - } - for (u32 i = 0; i < images_count; ++i) { - u64 offset{}; - u64 index{}; - u32 type{}; - u8 is_bindless{}; - u8 is_written{}; - u8 is_read{}; - u8 is_atomic{}; - if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || - !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || - !LoadObjectFromPrecompiled(is_atomic)) { - return {}; + for (const auto& key : bindless_samplers) { + usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); } - entry.entries.images.emplace_back( - static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, - is_read != 0, is_atomic != 0); - } - u32 global_memory_count{}; - if (!LoadObjectFromPrecompiled(global_memory_count)) { - return {}; - } - for (u32 i = 0; i < global_memory_count; ++i) { - u32 cbuf_index{}; - u32 cbuf_offset{}; - bool is_read{}; - bool is_written{}; - if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) || - !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { + ShaderDiskCacheDump dump; + if (!LoadObjectFromPrecompiled(dump.binary_format)) { return {}; } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read, - is_written); - } - for (auto& clip_distance : entry.entries.clip_distances) { - if (!LoadObjectFromPrecompiled(clip_distance)) { + u32 binary_length{}; + if (!LoadObjectFromPrecompiled(binary_length)) { return {}; } - } - u64 shader_length{}; - if (!LoadObjectFromPrecompiled(shader_length)) { - return {}; - } - entry.entries.shader_length = static_cast<std::size_t>(shader_length); - - return entry; -} - -bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries) { - if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) || - !SaveObjectToPrecompiled(unique_identifier) || - !SaveObjectToPrecompiled(static_cast<u32>(code.size())) || - !SaveArrayToPrecompiled(code.data(), code.size())) { - return false; - } - - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) { - return false; - } - for (const auto& cbuf : entries.const_buffers) { - if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) || - !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) || - !SaveObjectToPrecompiled(cbuf.IsIndirect())) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) { - return false; - } - for (const auto& sampler : entries.samplers) { - if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) || - !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) || - !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) || - !SaveObjectToPrecompiled(sampler.IsArray()) || - !SaveObjectToPrecompiled(sampler.IsShadow()) || - !SaveObjectToPrecompiled(sampler.IsBindless())) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) { - return false; - } - for (const auto& image : entries.images) { - if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || - !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || - !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) { - return false; - } - } - - if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { - return false; - } - for (const auto& gmem : entries.global_memory_entries) { - if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) || - !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) || - !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) { - return false; - } - } - - for (const bool clip_distance : entries.clip_distances) { - if (!SaveObjectToPrecompiled(clip_distance)) { - return false; + dump.binary.resize(binary_length); + if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { + return {}; } - } - if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { - return false; + dumps.emplace(std::move(usage), dump); } - - return true; + return dumps; } void ShaderDiskCacheOpenGL::InvalidateTransferable() { @@ -479,8 +346,9 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { } void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { - if (!IsUsable()) + if (!is_usable) { return; + } const u64 id = entry.GetUniqueIdentifier(); if (transferable.find(id) != transferable.end()) { @@ -489,10 +357,11 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { } FileUtil::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) + if (!file.IsOpen()) { return; + } if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { - LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing"); + LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); file.Close(); InvalidateTransferable(); return; @@ -501,8 +370,9 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { } void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { - if (!IsUsable()) + if (!is_usable) { return; + } const auto it = transferable.find(usage.unique_identifier); ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously"); @@ -517,35 +387,54 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { FileUtil::IOFile file = AppendTransferableFile(); if (!file.IsOpen()) return; - - if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) { - LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing"); + const auto Close = [&] { + LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing"); file.Close(); InvalidateTransferable(); + }; + + if (file.WriteObject(TransferableEntryKind::Usage) != 1 || + file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || + file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || + file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || + file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { + Close(); return; } + for (const auto& [pair, value] : usage.keys) { + const auto [cbuf, offset] = pair; + if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) { + Close(); + return; + } + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) { + Close(); + return; + } + } + for (const auto& [pair, sampler] : usage.bindless_samplers) { + const auto [cbuf, offset] = pair; + if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { + Close(); + return; + } + } } -void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries) { - if (!IsUsable()) +void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { + if (!is_usable) { return; + } + // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header + // when writing the dump. This should be done the moment I get access to write to the virtual + // file. if (precompiled_cache_virtual_file.GetSize() == 0) { SavePrecompiledHeaderToVirtualPrecompiledCache(); } - if (!SaveDecompiledFile(unique_identifier, code, entries)) { - LOG_ERROR(Render_OpenGL, - "Failed to save decompiled entry to the precompiled file - removing"); - InvalidatePrecompiled(); - } -} - -void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { - if (!IsUsable()) - return; - GLint binary_length{}; glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); @@ -553,25 +442,51 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p std::vector<u8> binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) || - !SaveObjectToPrecompiled(usage) || - !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || - !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing", + const auto Close = [&] { + LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", usage.unique_identifier); InvalidatePrecompiled(); + }; + + if (!SaveObjectToPrecompiled(usage.unique_identifier) || + !SaveObjectToPrecompiled(usage.variant) || + !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || + !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || + !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { + Close(); return; } -} - -bool ShaderDiskCacheOpenGL::IsUsable() const { - return tried_to_load && Settings::values.use_disk_shader_cache; + for (const auto& [pair, value] : usage.keys) { + const auto [cbuf, offset] = pair; + if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) { + Close(); + return; + } + } + for (const auto& [offset, sampler] : usage.bound_samplers) { + if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) { + Close(); + return; + } + } + for (const auto& [pair, sampler] : usage.bindless_samplers) { + const auto [cbuf, offset] = pair; + if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { + Close(); + return; + } + } + if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || + !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || + !SaveArrayToPrecompiled(binary.data(), binary.size())) { + Close(); + } } FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { - if (!EnsureDirectories()) + if (!EnsureDirectories()) { return {}; + } const auto transferable_path{GetTransferablePath()}; const bool existed = FileUtil::Exists(transferable_path); @@ -603,8 +518,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { precompiled_cache_virtual_file_offset = 0; - const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); - const std::vector<u8>& compressed = + const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); + const std::vector<u8> compressed = Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); const auto precompiled_path{GetPrecompiledPath()}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index cc8bbd61e..db23ada93 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -8,6 +8,7 @@ #include <optional> #include <string> #include <tuple> +#include <type_traits> #include <unordered_map> #include <unordered_set> #include <utility> @@ -19,6 +20,7 @@ #include "common/common_types.h" #include "core/file_sys/vfs_vector.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/shader/const_buffer_locker.h" namespace Core { class System; @@ -53,6 +55,7 @@ struct BaseBindings { return !operator==(rhs); } }; +static_assert(std::is_trivially_copyable_v<BaseBindings>); /// Describes the different variants a single program can be compiled. struct ProgramVariant { @@ -70,13 +73,20 @@ struct ProgramVariant { } }; +static_assert(std::is_trivially_copyable_v<ProgramVariant>); + /// Describes how a shader is used. struct ShaderDiskCacheUsage { u64 unique_identifier{}; ProgramVariant variant; + VideoCommon::Shader::KeyMap keys; + VideoCommon::Shader::BoundSamplerMap bound_samplers; + VideoCommon::Shader::BindlessSamplerMap bindless_samplers; bool operator==(const ShaderDiskCacheUsage& rhs) const { - return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant); + return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) == + std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers, + rhs.bindless_samplers); } bool operator!=(const ShaderDiskCacheUsage& rhs) const { @@ -123,8 +133,7 @@ namespace OpenGL { class ShaderDiskCacheRaw { public: explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, - u32 program_code_size, u32 program_code_size_b, - ProgramCode program_code, ProgramCode program_code_b); + ProgramCode program_code, ProgramCode program_code_b = {}); ShaderDiskCacheRaw(); ~ShaderDiskCacheRaw(); @@ -155,22 +164,14 @@ public: private: u64 unique_identifier{}; ProgramType program_type{}; - u32 program_code_size{}; - u32 program_code_size_b{}; ProgramCode program_code; ProgramCode program_code_b; }; -/// Contains decompiled data from a shader -struct ShaderDiskCacheDecompiled { - std::string code; - GLShader::ShaderEntries entries; -}; - /// Contains an OpenGL dumped binary program struct ShaderDiskCacheDump { - GLenum binary_format; + GLenum binary_format{}; std::vector<u8> binary; }; @@ -184,9 +185,7 @@ public: LoadTransferable(); /// Loads current game's precompiled cache. Invalidates on failure. - std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> - LoadPrecompiled(); + std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); /// Removes the transferable (and precompiled) cache file. void InvalidateTransferable(); @@ -200,10 +199,6 @@ public: /// Saves shader usage to the transferable file. Does not check for collisions. void SaveUsage(const ShaderDiskCacheUsage& usage); - /// Saves a decompiled entry to the precompiled file. Does not check for collisions. - void SaveDecompiled(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries); - /// Saves a dump entry to the precompiled file. Does not check for collisions. void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); @@ -212,21 +207,9 @@ public: private: /// Loads the transferable cache. Returns empty on failure. - std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> + std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> LoadPrecompiledFile(FileUtil::IOFile& file); - /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on - /// failure. - std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(); - - /// Saves a decompiled entry to the passed file. Returns true on success. - bool SaveDecompiledFile(u64 unique_identifier, const std::string& code, - const GLShader::ShaderEntries& entries); - - /// Returns if the cache can be used - bool IsUsable() const; - /// Opens current game's transferable file and write it's header if it doesn't exist FileUtil::IOFile AppendTransferableFile() const; @@ -297,7 +280,7 @@ private: std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; // The cache has been loaded at boot - bool tried_to_load{}; + bool is_usable{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 3a8d9e1da..af17216bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -11,93 +11,56 @@ namespace OpenGL::GLShader { using Tegra::Engines::Maxwell3D; +using VideoCommon::Shader::CompileDepth; +using VideoCommon::Shader::CompilerSettings; using VideoCommon::Shader::ProgramCode; using VideoCommon::Shader::ShaderIR; -static constexpr u32 PROGRAM_OFFSET = 10; -static constexpr u32 COMPUTE_OFFSET = 0; - -ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: VS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { + std::string out = GetCommonDeclarations(); out += R"( layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { - vec4 viewport_flip; - uvec4 config_pack; // instance_id, flip_stage, y_direction, padding + float y_direction; }; )"; - - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); - const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; - ProgramResult program = Decompile(device, program_ir, stage, "vertex"); - out += program.first; - - if (setup.IsDualProgram()) { - const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); - ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); - out += program_b.first; + const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB; + out += Decompile(device, ir, stage, "vertex"); + if (ir_b) { + out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b"); } out += R"( void main() { execute_vertex(); )"; - - if (setup.IsDualProgram()) { + if (ir_b) { out += " execute_vertex_b();"; } - - out += R"( - - // Set Position Y direction - gl_Position.y *= utof(config_pack[2]); - // Check if the flip stage is VertexB - // Config pack's second value is flip_stage - if (config_pack[1] == 1) { - // Viewport can be flipped, which is unsupported by glViewport - gl_Position.xy *= viewport_flip.xy; - } -})"; - - return {std::move(out), std::move(program.second)}; + out += "}\n"; + return out; } -ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: GS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); out += R"( layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { - vec4 viewport_flip; - uvec4 config_pack; // instance_id, flip_stage, y_direction, padding + float y_direction; }; )"; - - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); - ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); - out += program.first; + out += Decompile(device, ir, ProgramType::Geometry, "geometry"); out += R"( void main() { execute_geometry(); -};)"; - - return {std::move(out), std::move(program.second)}; +} +)"; + return out; } -ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: FS" + id + "\n\n"; - out += GetCommonDeclarations(); - +std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); out += R"( layout (location = 0) out vec4 FragColor0; layout (location = 1) out vec4 FragColor1; @@ -109,40 +72,29 @@ layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { - vec4 viewport_flip; - uvec4 config_pack; // instance_id, flip_stage, y_direction, padding + float y_direction; }; )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); - ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); - out += program.first; + out += Decompile(device, ir, ProgramType::Fragment, "fragment"); out += R"( void main() { execute_fragment(); } - )"; - return {std::move(out), std::move(program.second)}; + return out; } -ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { - const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - - std::string out = "// Shader Unique Id: CS" + id + "\n\n"; - out += GetCommonDeclarations(); - - const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a); - ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); - out += program.first; - +std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { + std::string out = GetCommonDeclarations(); + out += Decompile(device, ir, ProgramType::Compute, "compute"); out += R"( void main() { execute_compute(); } )"; - return {std::move(out), std::move(program.second)}; + return out; } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 3833e88ab..cba2be9f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -17,44 +17,18 @@ class Device; namespace OpenGL::GLShader { using VideoCommon::Shader::ProgramCode; - -struct ShaderSetup { - explicit ShaderSetup(ProgramCode program_code) { - program.code = std::move(program_code); - } - - struct { - ProgramCode code; - ProgramCode code_b; // Used for dual vertex shaders - u64 unique_identifier; - std::size_t size_a; - std::size_t size_b; - } program; - - /// Used in scenarios where we have a dual vertex shaders - void SetProgramB(ProgramCode program_b) { - program.code_b = std::move(program_b); - has_program_b = true; - } - - bool IsDualProgram() const { - return has_program_b; - } - -private: - bool has_program_b{}; -}; +using VideoCommon::Shader::ShaderIR; /// Generates the GLSL vertex shader program source code for the given VS program -ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup); +std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); /// Generates the GLSL geometry shader program source code for the given GS program -ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup); +std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); /// Generates the GLSL fragment shader program source code for the given FS program -ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); +std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); /// Generates the GLSL compute shader program source code for the given CS program -ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); +std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index b05f90f20..75d3fac04 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -40,27 +40,11 @@ void ProgramManager::UpdatePipeline() { old_state = current_state; } -void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) { +void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) { const auto& regs = maxwell.regs; - const auto& state = maxwell.state; - - // TODO(bunnei): Support more than one viewport - viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; - viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f; - - instance_id = state.current_instance; - - // Assign in which stage the position has to be flipped - // (the last stage before the fragment shader). - constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); - if (maxwell.regs.shader_config[geometry_index].enable) { - flip_stage = geometry_index; - } else { - flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); - } // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. - y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f; + y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 6961e702a..3703e7018 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -18,17 +18,12 @@ namespace OpenGL::GLShader { /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at /// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. /// Not following that rule will cause problems on some AMD drivers. -struct MaxwellUniformData { - void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage); - - alignas(16) GLvec4 viewport_flip; - struct alignas(16) { - GLuint instance_id; - GLuint flip_stage; - GLfloat y_direction; - }; +struct alignas(16) MaxwellUniformData { + void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); + + GLfloat y_direction; }; -static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect"); +static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); static_assert(sizeof(MaxwellUniformData) < 16384, "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index bf86b5a0b..ccbe5912e 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <iterator> #include <glad/glad.h> #include "common/assert.h" @@ -69,147 +70,29 @@ void Enable(GLenum cap, GLuint index, bool enable) { } void Enable(GLenum cap, bool& current_value, bool new_value) { - if (UpdateValue(current_value, new_value)) + if (UpdateValue(current_value, new_value)) { Enable(cap, new_value); + } } void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) { - if (UpdateValue(current_value, new_value)) + if (UpdateValue(current_value, new_value)) { Enable(cap, index, new_value); -} - -} // namespace - -OpenGLState::OpenGLState() { - // These all match default OpenGL values - framebuffer_srgb.enabled = false; - - multisample_control.alpha_to_coverage = false; - multisample_control.alpha_to_one = false; - - cull.enabled = false; - cull.mode = GL_BACK; - cull.front_face = GL_CCW; - - depth.test_enabled = false; - depth.test_func = GL_LESS; - depth.write_mask = GL_TRUE; - - primitive_restart.enabled = false; - primitive_restart.index = 0; - - for (auto& item : color_mask) { - item.red_enabled = GL_TRUE; - item.green_enabled = GL_TRUE; - item.blue_enabled = GL_TRUE; - item.alpha_enabled = GL_TRUE; - } - - const auto ResetStencil = [](auto& config) { - config.test_func = GL_ALWAYS; - config.test_ref = 0; - config.test_mask = 0xFFFFFFFF; - config.write_mask = 0xFFFFFFFF; - config.action_depth_fail = GL_KEEP; - config.action_depth_pass = GL_KEEP; - config.action_stencil_fail = GL_KEEP; - }; - stencil.test_enabled = false; - ResetStencil(stencil.front); - ResetStencil(stencil.back); - - for (auto& item : viewports) { - item.x = 0; - item.y = 0; - item.width = 0; - item.height = 0; - item.depth_range_near = 0.0f; - item.depth_range_far = 1.0f; - item.scissor.enabled = false; - item.scissor.x = 0; - item.scissor.y = 0; - item.scissor.width = 0; - item.scissor.height = 0; } +} - for (auto& item : blend) { - item.enabled = true; - item.rgb_equation = GL_FUNC_ADD; - item.a_equation = GL_FUNC_ADD; - item.src_rgb_func = GL_ONE; - item.dst_rgb_func = GL_ZERO; - item.src_a_func = GL_ONE; - item.dst_a_func = GL_ZERO; - } - - independant_blend.enabled = false; - - blend_color.red = 0.0f; - blend_color.green = 0.0f; - blend_color.blue = 0.0f; - blend_color.alpha = 0.0f; - - logic_op.enabled = false; - logic_op.operation = GL_COPY; - - draw.read_framebuffer = 0; - draw.draw_framebuffer = 0; - draw.vertex_array = 0; - draw.shader_program = 0; - draw.program_pipeline = 0; - - clip_distance = {}; - - point.size = 1; - - fragment_color_clamp.enabled = false; - - depth_clamp.far_plane = false; - depth_clamp.near_plane = false; - - polygon_offset.fill_enable = false; - polygon_offset.line_enable = false; - polygon_offset.point_enable = false; - polygon_offset.factor = 0.0f; - polygon_offset.units = 0.0f; - polygon_offset.clamp = 0.0f; +} // Anonymous namespace - alpha_test.enabled = false; - alpha_test.func = GL_ALWAYS; - alpha_test.ref = 0.0f; -} +OpenGLState::OpenGLState() = default; void OpenGLState::SetDefaultViewports() { - for (auto& item : viewports) { - item.x = 0; - item.y = 0; - item.width = 0; - item.height = 0; - item.depth_range_near = 0.0f; - item.depth_range_far = 1.0f; - item.scissor.enabled = false; - item.scissor.x = 0; - item.scissor.y = 0; - item.scissor.width = 0; - item.scissor.height = 0; - } + viewports.fill(Viewport{}); depth_clamp.far_plane = false; depth_clamp.near_plane = false; } -void OpenGLState::ApplyDefaultState() { - glEnable(GL_BLEND); - glDisable(GL_FRAMEBUFFER_SRGB); - glDisable(GL_CULL_FACE); - glDisable(GL_DEPTH_TEST); - glDisable(GL_PRIMITIVE_RESTART); - glDisable(GL_STENCIL_TEST); - glDisable(GL_COLOR_LOGIC_OP); - glDisable(GL_SCISSOR_TEST); -} - -void OpenGLState::ApplyFramebufferState() const { +void OpenGLState::ApplyFramebufferState() { if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) { glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); } @@ -218,52 +101,52 @@ void OpenGLState::ApplyFramebufferState() const { } } -void OpenGLState::ApplyVertexArrayState() const { +void OpenGLState::ApplyVertexArrayState() { if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) { glBindVertexArray(draw.vertex_array); } } -void OpenGLState::ApplyShaderProgram() const { +void OpenGLState::ApplyShaderProgram() { if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) { glUseProgram(draw.shader_program); } } -void OpenGLState::ApplyProgramPipeline() const { +void OpenGLState::ApplyProgramPipeline() { if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) { glBindProgramPipeline(draw.program_pipeline); } } -void OpenGLState::ApplyClipDistances() const { +void OpenGLState::ApplyClipDistances() { for (std::size_t i = 0; i < clip_distance.size(); ++i) { Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i], clip_distance[i]); } } -void OpenGLState::ApplyPointSize() const { +void OpenGLState::ApplyPointSize() { if (UpdateValue(cur_state.point.size, point.size)) { glPointSize(point.size); } } -void OpenGLState::ApplyFragmentColorClamp() const { +void OpenGLState::ApplyFragmentColorClamp() { if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) { glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); } } -void OpenGLState::ApplyMultisample() const { +void OpenGLState::ApplyMultisample() { Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage, multisample_control.alpha_to_coverage); Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one, multisample_control.alpha_to_one); } -void OpenGLState::ApplyDepthClamp() const { +void OpenGLState::ApplyDepthClamp() { if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { return; @@ -276,7 +159,7 @@ void OpenGLState::ApplyDepthClamp() const { Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane); } -void OpenGLState::ApplySRgb() const { +void OpenGLState::ApplySRgb() { if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled) return; cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled; @@ -287,7 +170,7 @@ void OpenGLState::ApplySRgb() const { } } -void OpenGLState::ApplyCulling() const { +void OpenGLState::ApplyCulling() { Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled); if (UpdateValue(cur_state.cull.mode, cull.mode)) { @@ -299,7 +182,12 @@ void OpenGLState::ApplyCulling() const { } } -void OpenGLState::ApplyColorMask() const { +void OpenGLState::ApplyColorMask() { + if (!dirty.color_mask) { + return; + } + dirty.color_mask = false; + for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { const auto& updated = color_mask[i]; auto& current = cur_state.color_mask[i]; @@ -314,7 +202,7 @@ void OpenGLState::ApplyColorMask() const { } } -void OpenGLState::ApplyDepth() const { +void OpenGLState::ApplyDepth() { Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled); if (cur_state.depth.test_func != depth.test_func) { @@ -328,7 +216,7 @@ void OpenGLState::ApplyDepth() const { } } -void OpenGLState::ApplyPrimitiveRestart() const { +void OpenGLState::ApplyPrimitiveRestart() { Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled); if (cur_state.primitive_restart.index != primitive_restart.index) { @@ -337,7 +225,12 @@ void OpenGLState::ApplyPrimitiveRestart() const { } } -void OpenGLState::ApplyStencilTest() const { +void OpenGLState::ApplyStencilTest() { + if (!dirty.stencil_state) { + return; + } + dirty.stencil_state = false; + Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled); const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) { @@ -366,7 +259,7 @@ void OpenGLState::ApplyStencilTest() const { ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); } -void OpenGLState::ApplyViewport() const { +void OpenGLState::ApplyViewport() { for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) { const auto& updated = viewports[i]; auto& current = cur_state.viewports[i]; @@ -403,7 +296,7 @@ void OpenGLState::ApplyViewport() const { } } -void OpenGLState::ApplyGlobalBlending() const { +void OpenGLState::ApplyGlobalBlending() { const Blend& updated = blend[0]; Blend& current = cur_state.blend[0]; @@ -427,7 +320,7 @@ void OpenGLState::ApplyGlobalBlending() const { } } -void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { +void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) { const Blend& updated = blend[target]; Blend& current = cur_state.blend[target]; @@ -451,7 +344,12 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { } } -void OpenGLState::ApplyBlending() const { +void OpenGLState::ApplyBlending() { + if (!dirty.blend_state) { + return; + } + dirty.blend_state = false; + if (independant_blend.enabled) { const bool force = independant_blend.enabled != cur_state.independant_blend.enabled; for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) { @@ -470,7 +368,7 @@ void OpenGLState::ApplyBlending() const { } } -void OpenGLState::ApplyLogicOp() const { +void OpenGLState::ApplyLogicOp() { Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled); if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) { @@ -478,7 +376,12 @@ void OpenGLState::ApplyLogicOp() const { } } -void OpenGLState::ApplyPolygonOffset() const { +void OpenGLState::ApplyPolygonOffset() { + if (!dirty.polygon_offset) { + return; + } + dirty.polygon_offset = false; + Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable, polygon_offset.fill_enable); Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable, @@ -499,7 +402,7 @@ void OpenGLState::ApplyPolygonOffset() const { } } -void OpenGLState::ApplyAlphaTest() const { +void OpenGLState::ApplyAlphaTest() { Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled); if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref), std::tie(alpha_test.func, alpha_test.ref))) { @@ -507,19 +410,25 @@ void OpenGLState::ApplyAlphaTest() const { } } -void OpenGLState::ApplyTextures() const { +void OpenGLState::ApplyClipControl() { + if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) { + glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE); + } +} + +void OpenGLState::ApplyTextures() { if (const auto update = UpdateArray(cur_state.textures, textures)) { glBindTextures(update->first, update->second, textures.data() + update->first); } } -void OpenGLState::ApplySamplers() const { +void OpenGLState::ApplySamplers() { if (const auto update = UpdateArray(cur_state.samplers, samplers)) { glBindSamplers(update->first, update->second, samplers.data() + update->first); } } -void OpenGLState::ApplyImages() const { +void OpenGLState::ApplyImages() { if (const auto update = UpdateArray(cur_state.images, images)) { glBindImageTextures(update->first, update->second, images.data() + update->first); } @@ -535,33 +444,22 @@ void OpenGLState::Apply() { ApplyPointSize(); ApplyFragmentColorClamp(); ApplyMultisample(); - if (dirty.color_mask) { - ApplyColorMask(); - dirty.color_mask = false; - } + ApplyColorMask(); ApplyDepthClamp(); ApplyViewport(); - if (dirty.stencil_state) { - ApplyStencilTest(); - dirty.stencil_state = false; - } + ApplyStencilTest(); ApplySRgb(); ApplyCulling(); ApplyDepth(); ApplyPrimitiveRestart(); - if (dirty.blend_state) { - ApplyBlending(); - dirty.blend_state = false; - } + ApplyBlending(); ApplyLogicOp(); ApplyTextures(); ApplySamplers(); ApplyImages(); - if (dirty.polygon_offset) { - ApplyPolygonOffset(); - dirty.polygon_offset = false; - } + ApplyPolygonOffset(); ApplyAlphaTest(); + ApplyClipControl(); } void OpenGLState::EmulateViewportWithScissor() { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index c358d3b38..eaff22bda 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -5,168 +5,150 @@ #pragma once #include <array> +#include <type_traits> #include <glad/glad.h> #include "video_core/engines/maxwell_3d.h" namespace OpenGL { -namespace TextureUnits { - -struct TextureUnit { - GLint id; - constexpr GLenum Enum() const { - return static_cast<GLenum>(GL_TEXTURE0 + id); - } -}; - -constexpr TextureUnit MaxwellTexture(int unit) { - return TextureUnit{unit}; -} - -constexpr TextureUnit LightingLUT{3}; -constexpr TextureUnit FogLUT{4}; -constexpr TextureUnit ProcTexNoiseLUT{5}; -constexpr TextureUnit ProcTexColorMap{6}; -constexpr TextureUnit ProcTexAlphaMap{7}; -constexpr TextureUnit ProcTexLUT{8}; -constexpr TextureUnit ProcTexDiffLUT{9}; - -} // namespace TextureUnits - class OpenGLState { public: struct { - bool enabled; // GL_FRAMEBUFFER_SRGB + bool enabled = false; // GL_FRAMEBUFFER_SRGB } framebuffer_srgb; struct { - bool alpha_to_coverage; // GL_ALPHA_TO_COVERAGE - bool alpha_to_one; // GL_ALPHA_TO_ONE + bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE + bool alpha_to_one = false; // GL_ALPHA_TO_ONE } multisample_control; struct { - bool enabled; // GL_CLAMP_FRAGMENT_COLOR_ARB + bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB } fragment_color_clamp; struct { - bool far_plane; - bool near_plane; + bool far_plane = false; + bool near_plane = false; } depth_clamp; // GL_DEPTH_CLAMP struct { - bool enabled; // GL_CULL_FACE - GLenum mode; // GL_CULL_FACE_MODE - GLenum front_face; // GL_FRONT_FACE + bool enabled = false; // GL_CULL_FACE + GLenum mode = GL_BACK; // GL_CULL_FACE_MODE + GLenum front_face = GL_CCW; // GL_FRONT_FACE } cull; struct { - bool test_enabled; // GL_DEPTH_TEST - GLenum test_func; // GL_DEPTH_FUNC - GLboolean write_mask; // GL_DEPTH_WRITEMASK + bool test_enabled = false; // GL_DEPTH_TEST + GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK + GLenum test_func = GL_LESS; // GL_DEPTH_FUNC } depth; struct { - bool enabled; - GLuint index; + bool enabled = false; + GLuint index = 0; } primitive_restart; // GL_PRIMITIVE_RESTART struct ColorMask { - GLboolean red_enabled; - GLboolean green_enabled; - GLboolean blue_enabled; - GLboolean alpha_enabled; + GLboolean red_enabled = GL_TRUE; + GLboolean green_enabled = GL_TRUE; + GLboolean blue_enabled = GL_TRUE; + GLboolean alpha_enabled = GL_TRUE; }; std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_mask; // GL_COLOR_WRITEMASK struct { - bool test_enabled; // GL_STENCIL_TEST + bool test_enabled = false; // GL_STENCIL_TEST struct { - GLenum test_func; // GL_STENCIL_FUNC - GLint test_ref; // GL_STENCIL_REF - GLuint test_mask; // GL_STENCIL_VALUE_MASK - GLuint write_mask; // GL_STENCIL_WRITEMASK - GLenum action_stencil_fail; // GL_STENCIL_FAIL - GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL - GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS + GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC + GLint test_ref = 0; // GL_STENCIL_REF + GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK + GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK + GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL + GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL + GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS } front, back; } stencil; struct Blend { - bool enabled; // GL_BLEND - GLenum rgb_equation; // GL_BLEND_EQUATION_RGB - GLenum a_equation; // GL_BLEND_EQUATION_ALPHA - GLenum src_rgb_func; // GL_BLEND_SRC_RGB - GLenum dst_rgb_func; // GL_BLEND_DST_RGB - GLenum src_a_func; // GL_BLEND_SRC_ALPHA - GLenum dst_a_func; // GL_BLEND_DST_ALPHA + bool enabled = false; // GL_BLEND + GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB + GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA + GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB + GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB + GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA + GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA }; std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend; struct { - bool enabled; + bool enabled = false; } independant_blend; struct { - GLclampf red; - GLclampf green; - GLclampf blue; - GLclampf alpha; + GLclampf red = 0.0f; + GLclampf green = 0.0f; + GLclampf blue = 0.0f; + GLclampf alpha = 0.0f; } blend_color; // GL_BLEND_COLOR struct { - bool enabled; // GL_LOGIC_OP_MODE - GLenum operation; + bool enabled = false; // GL_LOGIC_OP_MODE + GLenum operation = GL_COPY; } logic_op; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{}; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{}; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures = {}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers = {}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {}; struct { - GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING - GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING - GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING - GLuint shader_program; // GL_CURRENT_PROGRAM - GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING + GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING + GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING + GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING + GLuint shader_program = 0; // GL_CURRENT_PROGRAM + GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING } draw; - struct viewport { - GLint x; - GLint y; - GLint width; - GLint height; - GLfloat depth_range_near; // GL_DEPTH_RANGE - GLfloat depth_range_far; // GL_DEPTH_RANGE + struct Viewport { + GLint x = 0; + GLint y = 0; + GLint width = 0; + GLint height = 0; + GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE + GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE struct { - bool enabled; // GL_SCISSOR_TEST - GLint x; - GLint y; - GLsizei width; - GLsizei height; + bool enabled = false; // GL_SCISSOR_TEST + GLint x = 0; + GLint y = 0; + GLsizei width = 0; + GLsizei height = 0; } scissor; }; - std::array<viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; + std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; struct { - float size; // GL_POINT_SIZE + float size = 1.0f; // GL_POINT_SIZE } point; struct { - bool point_enable; - bool line_enable; - bool fill_enable; - GLfloat units; - GLfloat factor; - GLfloat clamp; + bool point_enable = false; + bool line_enable = false; + bool fill_enable = false; + GLfloat units = 0.0f; + GLfloat factor = 0.0f; + GLfloat clamp = 0.0f; } polygon_offset; struct { - bool enabled; // GL_ALPHA_TEST - GLenum func; // GL_ALPHA_TEST_FUNC - GLfloat ref; // GL_ALPHA_TEST_REF + bool enabled = false; // GL_ALPHA_TEST + GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC + GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF } alpha_test; - std::array<bool, 8> clip_distance; // GL_CLIP_DISTANCE + std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE + + struct { + GLenum origin = GL_LOWER_LEFT; + } clip_control; OpenGLState(); @@ -179,34 +161,32 @@ public: /// Apply this state as the current OpenGL state void Apply(); - void ApplyFramebufferState() const; - void ApplyVertexArrayState() const; - void ApplyShaderProgram() const; - void ApplyProgramPipeline() const; - void ApplyClipDistances() const; - void ApplyPointSize() const; - void ApplyFragmentColorClamp() const; - void ApplyMultisample() const; - void ApplySRgb() const; - void ApplyCulling() const; - void ApplyColorMask() const; - void ApplyDepth() const; - void ApplyPrimitiveRestart() const; - void ApplyStencilTest() const; - void ApplyViewport() const; - void ApplyTargetBlending(std::size_t target, bool force) const; - void ApplyGlobalBlending() const; - void ApplyBlending() const; - void ApplyLogicOp() const; - void ApplyTextures() const; - void ApplySamplers() const; - void ApplyImages() const; - void ApplyDepthClamp() const; - void ApplyPolygonOffset() const; - void ApplyAlphaTest() const; - - /// Set the initial OpenGL state - static void ApplyDefaultState(); + void ApplyFramebufferState(); + void ApplyVertexArrayState(); + void ApplyShaderProgram(); + void ApplyProgramPipeline(); + void ApplyClipDistances(); + void ApplyPointSize(); + void ApplyFragmentColorClamp(); + void ApplyMultisample(); + void ApplySRgb(); + void ApplyCulling(); + void ApplyColorMask(); + void ApplyDepth(); + void ApplyPrimitiveRestart(); + void ApplyStencilTest(); + void ApplyViewport(); + void ApplyTargetBlending(std::size_t target, bool force); + void ApplyGlobalBlending(); + void ApplyBlending(); + void ApplyLogicOp(); + void ApplyTextures(); + void ApplySamplers(); + void ApplyImages(); + void ApplyDepthClamp(); + void ApplyPolygonOffset(); + void ApplyAlphaTest(); + void ApplyClipControl(); /// Resets any references to the given resource OpenGLState& UnbindTexture(GLuint handle); @@ -253,5 +233,6 @@ private: bool color_mask; } dirty{}; }; +static_assert(std::is_trivially_copyable_v<OpenGLState>); } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 173b76c4e..4659e098f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -23,7 +23,6 @@ namespace OpenGL { using Tegra::Texture::SwizzleSource; using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::ComponentType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceCompression; using VideoCore::Surface::SurfaceTarget; @@ -40,102 +39,95 @@ struct FormatTuple { GLint internal_format; GLenum format; GLenum type; - ComponentType component_type; bool compressed; }; constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U - {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, - false}, // A2B10G10R10U - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U - {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, - false}, // R11FG11FB10F - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT1 - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT23 - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT45 - {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 - {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXN2UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // BC7U - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, - true}, // BC6H_UF16 - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, - true}, // BC6H_SF16 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 - {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F - {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F - {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F - {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F - {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U - {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI - {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI - {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I - {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S - {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, - false}, // RGBA8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U - {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI - {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBX16F - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4 - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U + {GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5U + {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8U + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 + {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 + {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN2UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true}, // DXN2SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16 + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_SF16 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4 + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 + {GL_RGBA32F, GL_RGBA, GL_FLOAT, false}, // RGBA32F + {GL_RG32F, GL_RG, GL_FLOAT, false}, // RG32F + {GL_R32F, GL_RED, GL_FLOAT, false}, // R32F + {GL_R16F, GL_RED, GL_HALF_FLOAT, false}, // R16F + {GL_R16, GL_RED, GL_UNSIGNED_SHORT, false}, // R16U + {GL_R16_SNORM, GL_RED, GL_SHORT, false}, // R16S + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false}, // R16UI + {GL_R16I, GL_RED_INTEGER, GL_SHORT, false}, // R16I + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false}, // RG16 + {GL_RG16F, GL_RG, GL_HALF_FLOAT, false}, // RG16F + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false}, // RG16UI + {GL_RG16I, GL_RG_INTEGER, GL_SHORT, false}, // RG16I + {GL_RG16_SNORM, GL_RG, GL_SHORT, false}, // RG16S + {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U + {GL_RG8, GL_RG, GL_BYTE, false}, // RG8S + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI + {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT1_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT23_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT45_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // BC7U_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false}, // R4G4B4A4U + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4_SRGB + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8_SRGB + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5_SRGB + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, - false}, // Z16 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false}, // Z32F + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16 // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, - false}, // Z24S8 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, - false}, // S8Z24 - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, - ComponentType::Float, false}, // Z32FS8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // Z24S8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // S8Z24 + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8 }}; -const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { +const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; return format; @@ -237,7 +229,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) : VideoCommon::SurfaceBase<View>(gpu_addr, params) { - const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)}; + const auto& tuple{GetFormatTuple(params.pixel_format)}; internal_format = tuple.internal_format; format = tuple.format; type = tuple.type; @@ -439,8 +431,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { texture_view.Create(); const GLuint handle{texture_view.handle}; - const FormatTuple& tuple{ - GetFormatTuple(owner_params.pixel_format, owner_params.component_type)}; + const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)}; glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level, params.num_levels, params.base_layer, params.num_layers); @@ -550,8 +541,8 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) const auto& dst_params = dst_surface->GetSurfaceParams(); UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); - const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); - const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); + const auto source_format = GetFormatTuple(src_params.pixel_format); + const auto dest_format = GetFormatTuple(dst_params.pixel_format); const std::size_t source_size = src_surface->GetHostSizeInBytes(); const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1e6ef66ab..7646cbb0e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst RendererOpenGL::~RendererOpenGL() = default; void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - system.GetPerfStats().EndSystemFrame(); - // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.AllDirty(); @@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); - system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); - system.GetPerfStats().BeginSystemFrame(); - // Restore the rasterizer state prev_state.AllDirty(); prev_state.Apply(); @@ -328,10 +323,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, // (e.g. handheld mode) on a 1920x1080 framebuffer. f32 scale_u = 1.f, scale_v = 1.f; if (framebuffer_crop_rect.GetWidth() > 0) { - scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / screen_info.texture.width; + scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / + static_cast<f32>(screen_info.texture.width); } if (framebuffer_crop_rect.GetHeight() > 0) { - scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / screen_info.texture.height; + scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / + static_cast<f32>(screen_info.texture.height); } std::array<ScreenRectVertex, 4> vertices = {{ diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 3c5acda3e..463ed43ae 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -95,83 +95,82 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar } // namespace Sampler struct FormatTuple { - vk::Format format; ///< Vulkan format - ComponentType component_type; ///< Abstracted component type - bool attachable; ///< True when this format can be used as an attachment + vk::Format format; ///< Vulkan format + bool attachable; ///< True when this format can be used as an attachment }; static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ - {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI - {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U - {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U - {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U - {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U - {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI - {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI - {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1 - {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23 - {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45 - {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1 - {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM - {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM - {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U - {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16 - {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16 - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4 - {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8 - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F - {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F - {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F - {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U - {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S - {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI - {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16 - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F - {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI - {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBX16F - {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8 - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5 - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4 + {vk::Format::eA8B8G8R8UnormPack32, true}, // ABGR8U + {vk::Format::eUndefined, false}, // ABGR8S + {vk::Format::eUndefined, false}, // ABGR8UI + {vk::Format::eB5G6R5UnormPack16, false}, // B5G6R5U + {vk::Format::eA2B10G10R10UnormPack32, true}, // A2B10G10R10U + {vk::Format::eUndefined, false}, // A1B5G5R5U + {vk::Format::eR8Unorm, true}, // R8U + {vk::Format::eUndefined, false}, // R8UI + {vk::Format::eUndefined, false}, // RGBA16F + {vk::Format::eUndefined, false}, // RGBA16U + {vk::Format::eUndefined, false}, // RGBA16UI + {vk::Format::eUndefined, false}, // R11FG11FB10F + {vk::Format::eUndefined, false}, // RGBA32UI + {vk::Format::eBc1RgbaUnormBlock, false}, // DXT1 + {vk::Format::eBc2UnormBlock, false}, // DXT23 + {vk::Format::eBc3UnormBlock, false}, // DXT45 + {vk::Format::eBc4UnormBlock, false}, // DXN1 + {vk::Format::eUndefined, false}, // DXN2UNORM + {vk::Format::eUndefined, false}, // DXN2SNORM + {vk::Format::eUndefined, false}, // BC7U + {vk::Format::eUndefined, false}, // BC6H_UF16 + {vk::Format::eUndefined, false}, // BC6H_SF16 + {vk::Format::eUndefined, false}, // ASTC_2D_4X4 + {vk::Format::eUndefined, false}, // BGRA8 + {vk::Format::eUndefined, false}, // RGBA32F + {vk::Format::eUndefined, false}, // RG32F + {vk::Format::eUndefined, false}, // R32F + {vk::Format::eUndefined, false}, // R16F + {vk::Format::eUndefined, false}, // R16U + {vk::Format::eUndefined, false}, // R16S + {vk::Format::eUndefined, false}, // R16UI + {vk::Format::eUndefined, false}, // R16I + {vk::Format::eUndefined, false}, // RG16 + {vk::Format::eUndefined, false}, // RG16F + {vk::Format::eUndefined, false}, // RG16UI + {vk::Format::eUndefined, false}, // RG16I + {vk::Format::eUndefined, false}, // RG16S + {vk::Format::eUndefined, false}, // RGB32F + {vk::Format::eA8B8G8R8SrgbPack32, true}, // RGBA8_SRGB + {vk::Format::eUndefined, false}, // RG8U + {vk::Format::eUndefined, false}, // RG8S + {vk::Format::eUndefined, false}, // RG32UI + {vk::Format::eUndefined, false}, // RGBX16F + {vk::Format::eUndefined, false}, // R32UI + {vk::Format::eUndefined, false}, // ASTC_2D_8X8 + {vk::Format::eUndefined, false}, // ASTC_2D_8X5 + {vk::Format::eUndefined, false}, // ASTC_2D_5X4 // Compressed sRGB formats - {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB - {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB - {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB - {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB - {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5 - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8 - {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB + {vk::Format::eUndefined, false}, // BGRA8_SRGB + {vk::Format::eUndefined, false}, // DXT1_SRGB + {vk::Format::eUndefined, false}, // DXT23_SRGB + {vk::Format::eUndefined, false}, // DXT45_SRGB + {vk::Format::eUndefined, false}, // BC7U_SRGB + {vk::Format::eUndefined, false}, // ASTC_2D_4X4_SRGB + {vk::Format::eUndefined, false}, // ASTC_2D_8X8_SRGB + {vk::Format::eUndefined, false}, // ASTC_2D_8X5_SRGB + {vk::Format::eUndefined, false}, // ASTC_2D_5X4_SRGB + {vk::Format::eUndefined, false}, // ASTC_2D_5X5 + {vk::Format::eUndefined, false}, // ASTC_2D_5X5_SRGB + {vk::Format::eUndefined, false}, // ASTC_2D_10X8 + {vk::Format::eUndefined, false}, // ASTC_2D_10X8_SRGB // Depth formats - {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F - {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16 + {vk::Format::eD32Sfloat, true}, // Z32F + {vk::Format::eD16Unorm, true}, // Z16 // DepthStencil formats - {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8 - {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated) - {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8 + {vk::Format::eD24UnormS8Uint, true}, // Z24S8 + {vk::Format::eD24UnormS8Uint, true}, // S8Z24 (emulated) + {vk::Format::eUndefined, false}, // Z32FS8 }}; static constexpr bool IsZetaFormat(PixelFormat pixel_format) { @@ -180,14 +179,13 @@ static constexpr bool IsZetaFormat(PixelFormat pixel_format) { } std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, - PixelFormat pixel_format, ComponentType component_type) { + PixelFormat pixel_format) { ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)]; UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined, - "Unimplemented texture format with pixel format={} and component type={}", - static_cast<u32>(pixel_format), static_cast<u32>(component_type)); - ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch"); + "Unimplemented texture format with pixel format={}", + static_cast<u32>(pixel_format)); auto usage = vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 4cadc0721..5b0ffd87a 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -16,7 +16,6 @@ namespace Vulkan::MaxwellToVK { using Maxwell = Tegra::Engines::Maxwell3D::Regs; using PixelFormat = VideoCore::Surface::PixelFormat; -using ComponentType = VideoCore::Surface::ComponentType; namespace Sampler { @@ -31,7 +30,7 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar } // namespace Sampler std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, - PixelFormat pixel_format, ComponentType component_type); + PixelFormat pixel_format); vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 77fc58f25..2850d5b59 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -88,6 +88,9 @@ bool IsPrecise(Operation operand) { } // namespace +class ASTDecompiler; +class ExprDecompiler; + class SPIRVDecompiler : public Sirit::Module { public: explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage) @@ -97,27 +100,7 @@ public: AddExtension("SPV_KHR_variable_pointers"); } - void Decompile() { - AllocateBindings(); - AllocateLabels(); - - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareRegisters(); - DeclarePredicates(); - DeclareLocalMemory(); - DeclareInternalFlags(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareConstantBuffers(); - DeclareGlobalBuffers(); - DeclareSamplers(); - - execute_function = - Emit(OpFunction(t_void, spv::FunctionControlMask::Inline, TypeFunction(t_void))); - Emit(OpLabel()); - + void DecompileBranchMode() { const u32 first_address = ir.GetBasicBlocks().begin()->first; const Id loop_label = OpLabel("loop"); const Id merge_label = OpLabel("merge"); @@ -174,6 +157,43 @@ public: Emit(continue_label); Emit(OpBranch(loop_label)); Emit(merge_label); + } + + void DecompileAST(); + + void Decompile() { + const bool is_fully_decompiled = ir.IsDecompiled(); + AllocateBindings(); + if (!is_fully_decompiled) { + AllocateLabels(); + } + + DeclareVertex(); + DeclareGeometry(); + DeclareFragment(); + DeclareRegisters(); + DeclarePredicates(); + if (is_fully_decompiled) { + DeclareFlowVariables(); + } + DeclareLocalMemory(); + DeclareInternalFlags(); + DeclareInputAttributes(); + DeclareOutputAttributes(); + DeclareConstantBuffers(); + DeclareGlobalBuffers(); + DeclareSamplers(); + + execute_function = + Emit(OpFunction(t_void, spv::FunctionControlMask::Inline, TypeFunction(t_void))); + Emit(OpLabel()); + + if (is_fully_decompiled) { + DecompileAST(); + } else { + DecompileBranchMode(); + } + Emit(OpReturn()); Emit(OpFunctionEnd()); } @@ -206,6 +226,9 @@ public: } private: + friend class ASTDecompiler; + friend class ExprDecompiler; + static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); void AllocateBindings() { @@ -294,6 +317,14 @@ private: } } + void DeclareFlowVariables() { + for (u32 i = 0; i < ir.GetASTNumVariables(); i++) { + const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); + Name(id, fmt::format("flow_var_{}", static_cast<u32>(i))); + flow_variables.emplace(i, AddGlobalVariable(id)); + } + } + void DeclareLocalMemory() { if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { const auto element_count = static_cast<u32>(Common::AlignUp(local_memory_size, 4) / 4); @@ -615,9 +646,15 @@ private: Emit(OpBranchConditional(condition, true_label, skip_label)); Emit(true_label); + ++conditional_nest_count; VisitBasicBlock(conditional->GetCode()); + --conditional_nest_count; - Emit(OpBranch(skip_label)); + if (inside_branch == 0) { + Emit(OpBranch(skip_label)); + } else { + inside_branch--; + } Emit(skip_label); return {}; @@ -746,6 +783,11 @@ private: return {}; } + Id FSwizzleAdd(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id HNegate(Operation operation) { UNIMPLEMENTED(); return {}; @@ -980,7 +1022,11 @@ private: UNIMPLEMENTED_IF(!target); Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue()))); - BranchingOp([&]() { Emit(OpBranch(continue_label)); }); + Emit(OpBranch(continue_label)); + inside_branch = conditional_nest_count; + if (conditional_nest_count == 0) { + Emit(OpLabel()); + } return {}; } @@ -988,7 +1034,11 @@ private: const Id op_a = VisitOperand<Type::Uint>(operation, 0); Emit(OpStore(jmp_to, op_a)); - BranchingOp([&]() { Emit(OpBranch(continue_label)); }); + Emit(OpBranch(continue_label)); + inside_branch = conditional_nest_count; + if (conditional_nest_count == 0) { + Emit(OpLabel()); + } return {}; } @@ -1015,11 +1065,15 @@ private: Emit(OpStore(flow_stack_top, previous)); Emit(OpStore(jmp_to, target)); - BranchingOp([&]() { Emit(OpBranch(continue_label)); }); + Emit(OpBranch(continue_label)); + inside_branch = conditional_nest_count; + if (conditional_nest_count == 0) { + Emit(OpLabel()); + } return {}; } - Id Exit(Operation operation) { + Id PreExit() { switch (stage) { case ShaderStage::Vertex: { // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't @@ -1067,12 +1121,35 @@ private: } } - BranchingOp([&]() { Emit(OpReturn()); }); + return {}; + } + + Id Exit(Operation operation) { + PreExit(); + inside_branch = conditional_nest_count; + if (conditional_nest_count > 0) { + Emit(OpReturn()); + } else { + const Id dummy = OpLabel(); + Emit(OpBranch(dummy)); + Emit(dummy); + Emit(OpReturn()); + Emit(OpLabel()); + } return {}; } Id Discard(Operation operation) { - BranchingOp([&]() { Emit(OpKill()); }); + inside_branch = conditional_nest_count; + if (conditional_nest_count > 0) { + Emit(OpKill()); + } else { + const Id dummy = OpLabel(); + Emit(OpBranch(dummy)); + Emit(dummy); + Emit(OpKill()); + Emit(OpLabel()); + } return {}; } @@ -1123,42 +1200,12 @@ private: return {}; } - Id ShuffleIndexed(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id ShuffleUp(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id ShuffleDown(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id ShuffleButterfly(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id InRangeShuffleIndexed(Operation) { + Id ThreadId(Operation) { UNIMPLEMENTED(); return {}; } - Id InRangeShuffleUp(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id InRangeShuffleDown(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id InRangeShuffleButterfly(Operation) { + Id ShuffleIndexed(Operation) { UNIMPLEMENTED(); return {}; } @@ -1267,17 +1314,6 @@ private: return {}; } - void BranchingOp(std::function<void()> call) { - const Id true_label = OpLabel(); - const Id skip_label = OpLabel(); - Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::Flatten)); - Emit(OpBranchConditional(v_true, true_label, skip_label, 1, 0)); - Emit(true_label); - call(); - - Emit(skip_label); - } - std::tuple<Id, Id> CreateFlowStack() { // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely // that shaders will use 20 nested SSYs and PBKs. @@ -1332,6 +1368,7 @@ private: &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, + &SPIRVDecompiler::FSwizzleAdd, &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, @@ -1467,15 +1504,8 @@ private: &SPIRVDecompiler::VoteAny, &SPIRVDecompiler::VoteEqual, + &SPIRVDecompiler::ThreadId, &SPIRVDecompiler::ShuffleIndexed, - &SPIRVDecompiler::ShuffleUp, - &SPIRVDecompiler::ShuffleDown, - &SPIRVDecompiler::ShuffleButterfly, - - &SPIRVDecompiler::InRangeShuffleIndexed, - &SPIRVDecompiler::InRangeShuffleUp, - &SPIRVDecompiler::InRangeShuffleDown, - &SPIRVDecompiler::InRangeShuffleButterfly, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); @@ -1483,6 +1513,8 @@ private: const ShaderIR& ir; const ShaderStage stage; const Tegra::Shader::Header header; + u64 conditional_nest_count{}; + u64 inside_branch{}; const Id t_void = Name(TypeVoid(), "void"); @@ -1545,6 +1577,7 @@ private: Id per_vertex{}; std::map<u32, Id> registers; std::map<Tegra::Shader::Pred, Id> predicates; + std::map<u32, Id> flow_variables; Id local_memory{}; std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; std::map<Attribute::Index, Id> input_attributes; @@ -1580,6 +1613,235 @@ private: std::map<u32, Id> labels; }; +class ExprDecompiler { +public: + explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} + + Id operator()(const ExprAnd& expr) { + const Id type_def = decomp.GetTypeDefinition(Type::Bool); + const Id op1 = Visit(expr.operand1); + const Id op2 = Visit(expr.operand2); + return decomp.Emit(decomp.OpLogicalAnd(type_def, op1, op2)); + } + + Id operator()(const ExprOr& expr) { + const Id type_def = decomp.GetTypeDefinition(Type::Bool); + const Id op1 = Visit(expr.operand1); + const Id op2 = Visit(expr.operand2); + return decomp.Emit(decomp.OpLogicalOr(type_def, op1, op2)); + } + + Id operator()(const ExprNot& expr) { + const Id type_def = decomp.GetTypeDefinition(Type::Bool); + const Id op1 = Visit(expr.operand1); + return decomp.Emit(decomp.OpLogicalNot(type_def, op1)); + } + + Id operator()(const ExprPredicate& expr) { + const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); + return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred))); + } + + Id operator()(const ExprCondCode& expr) { + const Node cc = decomp.ir.GetConditionCode(expr.cc); + Id target; + + if (const auto pred = std::get_if<PredicateNode>(&*cc)) { + const auto index = pred->GetIndex(); + switch (index) { + case Tegra::Shader::Pred::NeverExecute: + target = decomp.v_false; + break; + case Tegra::Shader::Pred::UnusedIndex: + target = decomp.v_true; + break; + default: + target = decomp.predicates.at(index); + break; + } + } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { + target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag())); + } + return decomp.Emit(decomp.OpLoad(decomp.t_bool, target)); + } + + Id operator()(const ExprVar& expr) { + return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index))); + } + + Id operator()(const ExprBoolean& expr) { + return expr.value ? decomp.v_true : decomp.v_false; + } + + Id operator()(const ExprGprEqual& expr) { + const Id target = decomp.Constant(decomp.t_uint, expr.value); + const Id gpr = decomp.BitcastTo<Type::Uint>( + decomp.Emit(decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)))); + return decomp.Emit(decomp.OpLogicalEqual(decomp.t_uint, gpr, target)); + } + + Id Visit(const Expr& node) { + return std::visit(*this, *node); + } + +private: + SPIRVDecompiler& decomp; +}; + +class ASTDecompiler { +public: + explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} + + void operator()(const ASTProgram& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(const ASTIfThen& ast) { + ExprDecompiler expr_parser{decomp}; + const Id condition = expr_parser.Visit(ast.condition); + const Id then_label = decomp.OpLabel(); + const Id endif_label = decomp.OpLabel(); + decomp.Emit(decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone)); + decomp.Emit(decomp.OpBranchConditional(condition, then_label, endif_label)); + decomp.Emit(then_label); + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + decomp.Emit(decomp.OpBranch(endif_label)); + decomp.Emit(endif_label); + } + + void operator()([[maybe_unused]] const ASTIfElse& ast) { + UNREACHABLE(); + } + + void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { + UNREACHABLE(); + } + + void operator()(const ASTBlockDecoded& ast) { + decomp.VisitBasicBlock(ast.nodes); + } + + void operator()(const ASTVarSet& ast) { + ExprDecompiler expr_parser{decomp}; + const Id condition = expr_parser.Visit(ast.condition); + decomp.Emit(decomp.OpStore(decomp.flow_variables.at(ast.index), condition)); + } + + void operator()([[maybe_unused]] const ASTLabel& ast) { + // Do nothing + } + + void operator()([[maybe_unused]] const ASTGoto& ast) { + UNREACHABLE(); + } + + void operator()(const ASTDoWhile& ast) { + const Id loop_label = decomp.OpLabel(); + const Id endloop_label = decomp.OpLabel(); + const Id loop_start_block = decomp.OpLabel(); + const Id loop_end_block = decomp.OpLabel(); + current_loop_exit = endloop_label; + decomp.Emit(decomp.OpBranch(loop_label)); + decomp.Emit(loop_label); + decomp.Emit( + decomp.OpLoopMerge(endloop_label, loop_end_block, spv::LoopControlMask::MaskNone)); + decomp.Emit(decomp.OpBranch(loop_start_block)); + decomp.Emit(loop_start_block); + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + ExprDecompiler expr_parser{decomp}; + const Id condition = expr_parser.Visit(ast.condition); + decomp.Emit(decomp.OpBranchConditional(condition, loop_label, endloop_label)); + decomp.Emit(endloop_label); + } + + void operator()(const ASTReturn& ast) { + if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { + ExprDecompiler expr_parser{decomp}; + const Id condition = expr_parser.Visit(ast.condition); + const Id then_label = decomp.OpLabel(); + const Id endif_label = decomp.OpLabel(); + decomp.Emit(decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone)); + decomp.Emit(decomp.OpBranchConditional(condition, then_label, endif_label)); + decomp.Emit(then_label); + if (ast.kills) { + decomp.Emit(decomp.OpKill()); + } else { + decomp.PreExit(); + decomp.Emit(decomp.OpReturn()); + } + decomp.Emit(endif_label); + } else { + const Id next_block = decomp.OpLabel(); + decomp.Emit(decomp.OpBranch(next_block)); + decomp.Emit(next_block); + if (ast.kills) { + decomp.Emit(decomp.OpKill()); + } else { + decomp.PreExit(); + decomp.Emit(decomp.OpReturn()); + } + decomp.Emit(decomp.OpLabel()); + } + } + + void operator()(const ASTBreak& ast) { + if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { + ExprDecompiler expr_parser{decomp}; + const Id condition = expr_parser.Visit(ast.condition); + const Id then_label = decomp.OpLabel(); + const Id endif_label = decomp.OpLabel(); + decomp.Emit(decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone)); + decomp.Emit(decomp.OpBranchConditional(condition, then_label, endif_label)); + decomp.Emit(then_label); + decomp.Emit(decomp.OpBranch(current_loop_exit)); + decomp.Emit(endif_label); + } else { + const Id next_block = decomp.OpLabel(); + decomp.Emit(decomp.OpBranch(next_block)); + decomp.Emit(next_block); + decomp.Emit(decomp.OpBranch(current_loop_exit)); + decomp.Emit(decomp.OpLabel()); + } + } + + void Visit(const ASTNode& node) { + std::visit(*this, *node->GetInnerData()); + } + +private: + SPIRVDecompiler& decomp; + Id current_loop_exit{}; +}; + +void SPIRVDecompiler::DecompileAST() { + const u32 num_flow_variables = ir.GetASTNumVariables(); + for (u32 i = 0; i < num_flow_variables; i++) { + const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); + Name(id, fmt::format("flow_var_{}", i)); + flow_variables.emplace(i, AddGlobalVariable(id)); + } + + const ASTNode program = ir.GetASTProgram(); + ASTDecompiler decompiler{*this}; + decompiler.Visit(program); + + const Id next_block = OpLabel(); + Emit(OpBranch(next_block)); + Emit(next_block); +} + DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage); diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp new file mode 100644 index 000000000..3f96d9076 --- /dev/null +++ b/src/video_core/shader/ast.cpp @@ -0,0 +1,753 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string> +#include <string_view> + +#include <fmt/format.h> + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/shader/ast.h" +#include "video_core/shader/expr.h" + +namespace VideoCommon::Shader { + +ASTZipper::ASTZipper() = default; + +void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { + ASSERT(new_first->manager == nullptr); + first = new_first; + last = new_first; + + ASTNode current = first; + while (current) { + current->manager = this; + current->parent = parent; + last = current; + current = current->next; + } +} + +void ASTZipper::PushBack(const ASTNode new_node) { + ASSERT(new_node->manager == nullptr); + new_node->previous = last; + if (last) { + last->next = new_node; + } + new_node->next.reset(); + last = new_node; + if (!first) { + first = new_node; + } + new_node->manager = this; +} + +void ASTZipper::PushFront(const ASTNode new_node) { + ASSERT(new_node->manager == nullptr); + new_node->previous.reset(); + new_node->next = first; + if (first) { + first->previous = new_node; + } + if (last == first) { + last = new_node; + } + first = new_node; + new_node->manager = this; +} + +void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { + ASSERT(new_node->manager == nullptr); + if (!at_node) { + PushFront(new_node); + return; + } + const ASTNode next = at_node->next; + if (next) { + next->previous = new_node; + } + new_node->previous = at_node; + if (at_node == last) { + last = new_node; + } + new_node->next = next; + at_node->next = new_node; + new_node->manager = this; +} + +void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { + ASSERT(new_node->manager == nullptr); + if (!at_node) { + PushBack(new_node); + return; + } + const ASTNode previous = at_node->previous; + if (previous) { + previous->next = new_node; + } + new_node->next = at_node; + if (at_node == first) { + first = new_node; + } + new_node->previous = previous; + at_node->previous = new_node; + new_node->manager = this; +} + +void ASTZipper::DetachTail(ASTNode node) { + ASSERT(node->manager == this); + if (node == first) { + first.reset(); + last.reset(); + return; + } + + last = node->previous; + last->next.reset(); + node->previous.reset(); + + ASTNode current = std::move(node); + while (current) { + current->manager = nullptr; + current->parent.reset(); + current = current->next; + } +} + +void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { + ASSERT(start->manager == this && end->manager == this); + if (start == end) { + DetachSingle(start); + return; + } + const ASTNode prev = start->previous; + const ASTNode post = end->next; + if (!prev) { + first = post; + } else { + prev->next = post; + } + if (!post) { + last = prev; + } else { + post->previous = prev; + } + start->previous.reset(); + end->next.reset(); + ASTNode current = start; + bool found = false; + while (current) { + current->manager = nullptr; + current->parent.reset(); + found |= current == end; + current = current->next; + } + ASSERT(found); +} + +void ASTZipper::DetachSingle(const ASTNode node) { + ASSERT(node->manager == this); + const ASTNode prev = node->previous; + const ASTNode post = node->next; + node->previous.reset(); + node->next.reset(); + if (!prev) { + first = post; + } else { + prev->next = post; + } + if (!post) { + last = prev; + } else { + post->previous = prev; + } + + node->manager = nullptr; + node->parent.reset(); +} + +void ASTZipper::Remove(const ASTNode node) { + ASSERT(node->manager == this); + const ASTNode next = node->next; + const ASTNode previous = node->previous; + if (previous) { + previous->next = next; + } + if (next) { + next->previous = previous; + } + node->parent.reset(); + node->manager = nullptr; + if (node == last) { + last = previous; + } + if (node == first) { + first = next; + } +} + +class ExprPrinter final { +public: + void operator()(const ExprAnd& expr) { + inner += "( "; + std::visit(*this, *expr.operand1); + inner += " && "; + std::visit(*this, *expr.operand2); + inner += ')'; + } + + void operator()(const ExprOr& expr) { + inner += "( "; + std::visit(*this, *expr.operand1); + inner += " || "; + std::visit(*this, *expr.operand2); + inner += ')'; + } + + void operator()(const ExprNot& expr) { + inner += "!"; + std::visit(*this, *expr.operand1); + } + + void operator()(const ExprPredicate& expr) { + inner += "P" + std::to_string(expr.predicate); + } + + void operator()(const ExprCondCode& expr) { + u32 cc = static_cast<u32>(expr.cc); + inner += "CC" + std::to_string(cc); + } + + void operator()(const ExprVar& expr) { + inner += "V" + std::to_string(expr.var_index); + } + + void operator()(const ExprBoolean& expr) { + inner += expr.value ? "true" : "false"; + } + + void operator()(const ExprGprEqual& expr) { + inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; + } + + const std::string& GetResult() const { + return inner; + } + +private: + std::string inner; +}; + +class ASTPrinter { +public: + void operator()(const ASTProgram& ast) { + scope++; + inner += "program {\n"; + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + inner += "}\n"; + scope--; + } + + void operator()(const ASTIfThen& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); + scope++; + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + scope--; + inner += fmt::format("{}}}\n", Indent()); + } + + void operator()(const ASTIfElse& ast) { + inner += Indent(); + inner += "else {\n"; + + scope++; + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + scope--; + + inner += Indent(); + inner += "}\n"; + } + + void operator()(const ASTBlockEncoded& ast) { + inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); + } + + void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { + inner += Indent(); + inner += "Block;\n"; + } + + void operator()(const ASTVarSet& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); + } + + void operator()(const ASTLabel& ast) { + inner += fmt::format("Label_{}:\n", ast.index); + } + + void operator()(const ASTGoto& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += + fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); + } + + void operator()(const ASTDoWhile& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += fmt::format("{}do {{\n", Indent()); + scope++; + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + scope--; + inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); + } + + void operator()(const ASTReturn& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), + ast.kills ? "discard" : "exit"); + } + + void operator()(const ASTBreak& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); + } + + void Visit(const ASTNode& node) { + std::visit(*this, *node->GetInnerData()); + } + + const std::string& GetResult() const { + return inner; + } + +private: + std::string_view Indent() { + if (space_segment_scope == scope) { + return space_segment; + } + + // Ensure that we don't exceed our view. + ASSERT(scope * 2 < spaces.size()); + + space_segment = spaces.substr(0, scope * 2); + space_segment_scope = scope; + return space_segment; + } + + std::string inner{}; + std::string_view space_segment; + + u32 scope{}; + u32 space_segment_scope{}; + + static constexpr std::string_view spaces{" "}; +}; + +std::string ASTManager::Print() const { + ASTPrinter printer{}; + printer.Visit(main_node); + return printer.GetResult(); +} + +ASTManager::ASTManager(bool full_decompile, bool disable_else_derivation) + : full_decompile{full_decompile}, disable_else_derivation{disable_else_derivation} {}; + +ASTManager::~ASTManager() { + Clear(); +} + +void ASTManager::Init() { + main_node = ASTBase::Make<ASTProgram>(ASTNode{}); + program = std::get_if<ASTProgram>(main_node->GetInnerData()); + false_condition = MakeExpr<ExprBoolean>(false); +} + +void ASTManager::DeclareLabel(u32 address) { + const auto pair = labels_map.emplace(address, labels_count); + if (pair.second) { + labels_count++; + labels.resize(labels_count); + } +} + +void ASTManager::InsertLabel(u32 address) { + const u32 index = labels_map[address]; + const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index); + labels[index] = label; + program->nodes.PushBack(label); +} + +void ASTManager::InsertGoto(Expr condition, u32 address) { + const u32 index = labels_map[address]; + const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index); + gotos.push_back(goto_node); + program->nodes.PushBack(goto_node); +} + +void ASTManager::InsertBlock(u32 start_address, u32 end_address) { + ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address); + program->nodes.PushBack(std::move(block)); +} + +void ASTManager::InsertReturn(Expr condition, bool kills) { + ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills); + program->nodes.PushBack(std::move(node)); +} + +// The decompile algorithm is based on +// "Taming control flow: A structured approach to eliminating goto statements" +// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be +// on the same structured level as the label which they jump to. This is done, +// through outward/inward movements and lifting. Once they are at the same +// level, you can enclose them in an "if" structure or a "do-while" structure. +void ASTManager::Decompile() { + auto it = gotos.begin(); + while (it != gotos.end()) { + const ASTNode goto_node = *it; + const auto label_index = goto_node->GetGotoLabel(); + if (!label_index) { + return; + } + const ASTNode label = labels[*label_index]; + if (!full_decompile) { + // We only decompile backward jumps + if (!IsBackwardsJump(goto_node, label)) { + it++; + continue; + } + } + if (IndirectlyRelated(goto_node, label)) { + while (!DirectlyRelated(goto_node, label)) { + MoveOutward(goto_node); + } + } + if (DirectlyRelated(goto_node, label)) { + u32 goto_level = goto_node->GetLevel(); + const u32 label_level = label->GetLevel(); + while (label_level < goto_level) { + MoveOutward(goto_node); + goto_level--; + } + // TODO(Blinkhawk): Implement Lifting and Inward Movements + } + if (label->GetParent() == goto_node->GetParent()) { + bool is_loop = false; + ASTNode current = goto_node->GetPrevious(); + while (current) { + if (current == label) { + is_loop = true; + break; + } + current = current->GetPrevious(); + } + + if (is_loop) { + EncloseDoWhile(goto_node, label); + } else { + EncloseIfThen(goto_node, label); + } + it = gotos.erase(it); + continue; + } + it++; + } + if (full_decompile) { + for (const ASTNode& label : labels) { + auto& manager = label->GetManager(); + manager.Remove(label); + } + labels.clear(); + } else { + auto label_it = labels.begin(); + while (label_it != labels.end()) { + bool can_remove = true; + ASTNode label = *label_it; + for (const ASTNode& goto_node : gotos) { + const auto label_index = goto_node->GetGotoLabel(); + if (!label_index) { + return; + } + ASTNode& glabel = labels[*label_index]; + if (glabel == label) { + can_remove = false; + break; + } + } + if (can_remove) { + label->MarkLabelUnused(); + } + } + } +} + +bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { + u32 goto_level = goto_node->GetLevel(); + u32 label_level = label_node->GetLevel(); + while (goto_level > label_level) { + goto_level--; + goto_node = goto_node->GetParent(); + } + while (label_level > goto_level) { + label_level--; + label_node = label_node->GetParent(); + } + while (goto_node->GetParent() != label_node->GetParent()) { + goto_node = goto_node->GetParent(); + label_node = label_node->GetParent(); + } + ASTNode current = goto_node->GetPrevious(); + while (current) { + if (current == label_node) { + return true; + } + current = current->GetPrevious(); + } + return false; +} + +bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { + return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); +} + +bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { + if (first->GetParent() == second->GetParent()) { + return false; + } + const u32 first_level = first->GetLevel(); + const u32 second_level = second->GetLevel(); + u32 min_level; + u32 max_level; + ASTNode max; + ASTNode min; + if (first_level > second_level) { + min_level = second_level; + min = second; + max_level = first_level; + max = first; + } else { + min_level = first_level; + min = first; + max_level = second_level; + max = second; + } + + while (max_level > min_level) { + max_level--; + max = max->GetParent(); + } + + return min->GetParent() == max->GetParent(); +} + +void ASTManager::ShowCurrentState(std::string_view state) const { + LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); + SanityCheck(); +} + +void ASTManager::SanityCheck() const { + for (const auto& label : labels) { + if (!label->GetParent()) { + LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); + } + } +} + +void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { + ASTZipper& zipper = goto_node->GetManager(); + const ASTNode loop_start = label->GetNext(); + if (loop_start == goto_node) { + zipper.Remove(goto_node); + return; + } + const ASTNode parent = label->GetParent(); + const Expr condition = goto_node->GetGotoCondition(); + zipper.DetachSegment(loop_start, goto_node); + const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition); + ASTZipper* sub_zipper = do_while_node->GetSubNodes(); + sub_zipper->Init(loop_start, do_while_node); + zipper.InsertAfter(do_while_node, label); + sub_zipper->Remove(goto_node); +} + +void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { + ASTZipper& zipper = goto_node->GetManager(); + const ASTNode if_end = label->GetPrevious(); + if (if_end == goto_node) { + zipper.Remove(goto_node); + return; + } + const ASTNode prev = goto_node->GetPrevious(); + const Expr condition = goto_node->GetGotoCondition(); + bool do_else = false; + if (!disable_else_derivation && prev->IsIfThen()) { + const Expr if_condition = prev->GetIfCondition(); + do_else = ExprAreEqual(if_condition, condition); + } + const ASTNode parent = label->GetParent(); + zipper.DetachSegment(goto_node, if_end); + ASTNode if_node; + if (do_else) { + if_node = ASTBase::Make<ASTIfElse>(parent); + } else { + Expr neg_condition = MakeExprNot(condition); + if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition); + } + ASTZipper* sub_zipper = if_node->GetSubNodes(); + sub_zipper->Init(goto_node, if_node); + zipper.InsertAfter(if_node, prev); + sub_zipper->Remove(goto_node); +} + +void ASTManager::MoveOutward(ASTNode goto_node) { + ASTZipper& zipper = goto_node->GetManager(); + const ASTNode parent = goto_node->GetParent(); + ASTZipper& zipper2 = parent->GetManager(); + const ASTNode grandpa = parent->GetParent(); + const bool is_loop = parent->IsLoop(); + const bool is_else = parent->IsIfElse(); + const bool is_if = parent->IsIfThen(); + + const ASTNode prev = goto_node->GetPrevious(); + const ASTNode post = goto_node->GetNext(); + + const Expr condition = goto_node->GetGotoCondition(); + zipper.DetachSingle(goto_node); + if (is_loop) { + const u32 var_index = NewVariable(); + const Expr var_condition = MakeExpr<ExprVar>(var_index); + const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); + const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); + zipper2.InsertBefore(var_node_init, parent); + zipper.InsertAfter(var_node, prev); + goto_node->SetGotoCondition(var_condition); + const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition); + zipper.InsertAfter(break_node, var_node); + } else if (is_if || is_else) { + const u32 var_index = NewVariable(); + const Expr var_condition = MakeExpr<ExprVar>(var_index); + const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); + const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); + if (is_if) { + zipper2.InsertBefore(var_node_init, parent); + } else { + zipper2.InsertBefore(var_node_init, parent->GetPrevious()); + } + zipper.InsertAfter(var_node, prev); + goto_node->SetGotoCondition(var_condition); + if (post) { + zipper.DetachTail(post); + const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition)); + ASTZipper* sub_zipper = if_node->GetSubNodes(); + sub_zipper->Init(post, if_node); + zipper.InsertAfter(if_node, var_node); + } + } else { + UNREACHABLE(); + } + const ASTNode next = parent->GetNext(); + if (is_if && next && next->IsIfElse()) { + zipper2.InsertAfter(goto_node, next); + goto_node->SetParent(grandpa); + return; + } + zipper2.InsertAfter(goto_node, parent); + goto_node->SetParent(grandpa); +} + +class ASTClearer { +public: + ASTClearer() = default; + + void operator()(const ASTProgram& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(const ASTIfThen& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(const ASTIfElse& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} + + void operator()(ASTBlockDecoded& ast) { + ast.nodes.clear(); + } + + void operator()([[maybe_unused]] const ASTVarSet& ast) {} + + void operator()([[maybe_unused]] const ASTLabel& ast) {} + + void operator()([[maybe_unused]] const ASTGoto& ast) {} + + void operator()(const ASTDoWhile& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()([[maybe_unused]] const ASTReturn& ast) {} + + void operator()([[maybe_unused]] const ASTBreak& ast) {} + + void Visit(const ASTNode& node) { + std::visit(*this, *node->GetInnerData()); + node->Clear(); + } +}; + +void ASTManager::Clear() { + if (!main_node) { + return; + } + ASTClearer clearer{}; + clearer.Visit(main_node); + main_node.reset(); + program = nullptr; + labels_map.clear(); + labels.clear(); + gotos.clear(); +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h new file mode 100644 index 000000000..a2f0044ba --- /dev/null +++ b/src/video_core/shader/ast.h @@ -0,0 +1,400 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <functional> +#include <list> +#include <memory> +#include <optional> +#include <string> +#include <unordered_map> +#include <vector> + +#include "video_core/shader/expr.h" +#include "video_core/shader/node.h" + +namespace VideoCommon::Shader { + +class ASTBase; +class ASTBlockDecoded; +class ASTBlockEncoded; +class ASTBreak; +class ASTDoWhile; +class ASTGoto; +class ASTIfElse; +class ASTIfThen; +class ASTLabel; +class ASTProgram; +class ASTReturn; +class ASTVarSet; + +using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded, + ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>; + +using ASTNode = std::shared_ptr<ASTBase>; + +enum class ASTZipperType : u32 { + Program, + IfThen, + IfElse, + Loop, +}; + +class ASTZipper final { +public: + explicit ASTZipper(); + + void Init(ASTNode first, ASTNode parent); + + ASTNode GetFirst() const { + return first; + } + + ASTNode GetLast() const { + return last; + } + + void PushBack(ASTNode new_node); + void PushFront(ASTNode new_node); + void InsertAfter(ASTNode new_node, ASTNode at_node); + void InsertBefore(ASTNode new_node, ASTNode at_node); + void DetachTail(ASTNode node); + void DetachSingle(ASTNode node); + void DetachSegment(ASTNode start, ASTNode end); + void Remove(ASTNode node); + + ASTNode first{}; + ASTNode last{}; +}; + +class ASTProgram { +public: + ASTZipper nodes{}; +}; + +class ASTIfThen { +public: + explicit ASTIfThen(Expr condition) : condition{std::move(condition)} {} + Expr condition; + ASTZipper nodes{}; +}; + +class ASTIfElse { +public: + ASTZipper nodes{}; +}; + +class ASTBlockEncoded { +public: + explicit ASTBlockEncoded(u32 start, u32 end) : start{start}, end{end} {} + u32 start; + u32 end; +}; + +class ASTBlockDecoded { +public: + explicit ASTBlockDecoded(NodeBlock&& new_nodes) : nodes(std::move(new_nodes)) {} + NodeBlock nodes; +}; + +class ASTVarSet { +public: + explicit ASTVarSet(u32 index, Expr condition) : index{index}, condition{std::move(condition)} {} + u32 index; + Expr condition; +}; + +class ASTLabel { +public: + explicit ASTLabel(u32 index) : index{index} {} + u32 index; + bool unused{}; +}; + +class ASTGoto { +public: + explicit ASTGoto(Expr condition, u32 label) : condition{std::move(condition)}, label{label} {} + Expr condition; + u32 label; +}; + +class ASTDoWhile { +public: + explicit ASTDoWhile(Expr condition) : condition{std::move(condition)} {} + Expr condition; + ASTZipper nodes{}; +}; + +class ASTReturn { +public: + explicit ASTReturn(Expr condition, bool kills) + : condition{std::move(condition)}, kills{kills} {} + Expr condition; + bool kills; +}; + +class ASTBreak { +public: + explicit ASTBreak(Expr condition) : condition{std::move(condition)} {} + Expr condition; +}; + +class ASTBase { +public: + explicit ASTBase(ASTNode parent, ASTData data) + : data{std::move(data)}, parent{std::move(parent)} {} + + template <class U, class... Args> + static ASTNode Make(ASTNode parent, Args&&... args) { + return std::make_shared<ASTBase>(std::move(parent), + ASTData(U(std::forward<Args>(args)...))); + } + + void SetParent(ASTNode new_parent) { + parent = std::move(new_parent); + } + + ASTNode& GetParent() { + return parent; + } + + const ASTNode& GetParent() const { + return parent; + } + + u32 GetLevel() const { + u32 level = 0; + auto next_parent = parent; + while (next_parent) { + next_parent = next_parent->GetParent(); + level++; + } + return level; + } + + ASTData* GetInnerData() { + return &data; + } + + const ASTData* GetInnerData() const { + return &data; + } + + ASTNode GetNext() const { + return next; + } + + ASTNode GetPrevious() const { + return previous; + } + + ASTZipper& GetManager() { + return *manager; + } + + const ASTZipper& GetManager() const { + return *manager; + } + + std::optional<u32> GetGotoLabel() const { + auto inner = std::get_if<ASTGoto>(&data); + if (inner) { + return {inner->label}; + } + return {}; + } + + Expr GetGotoCondition() const { + auto inner = std::get_if<ASTGoto>(&data); + if (inner) { + return inner->condition; + } + return nullptr; + } + + void MarkLabelUnused() { + auto inner = std::get_if<ASTLabel>(&data); + if (inner) { + inner->unused = true; + } + } + + bool IsLabelUnused() const { + auto inner = std::get_if<ASTLabel>(&data); + if (inner) { + return inner->unused; + } + return true; + } + + std::optional<u32> GetLabelIndex() const { + auto inner = std::get_if<ASTLabel>(&data); + if (inner) { + return {inner->index}; + } + return {}; + } + + Expr GetIfCondition() const { + auto inner = std::get_if<ASTIfThen>(&data); + if (inner) { + return inner->condition; + } + return nullptr; + } + + void SetGotoCondition(Expr new_condition) { + auto inner = std::get_if<ASTGoto>(&data); + if (inner) { + inner->condition = std::move(new_condition); + } + } + + bool IsIfThen() const { + return std::holds_alternative<ASTIfThen>(data); + } + + bool IsIfElse() const { + return std::holds_alternative<ASTIfElse>(data); + } + + bool IsBlockEncoded() const { + return std::holds_alternative<ASTBlockEncoded>(data); + } + + void TransformBlockEncoded(NodeBlock&& nodes) { + data = ASTBlockDecoded(std::move(nodes)); + } + + bool IsLoop() const { + return std::holds_alternative<ASTDoWhile>(data); + } + + ASTZipper* GetSubNodes() { + if (std::holds_alternative<ASTProgram>(data)) { + return &std::get_if<ASTProgram>(&data)->nodes; + } + if (std::holds_alternative<ASTIfThen>(data)) { + return &std::get_if<ASTIfThen>(&data)->nodes; + } + if (std::holds_alternative<ASTIfElse>(data)) { + return &std::get_if<ASTIfElse>(&data)->nodes; + } + if (std::holds_alternative<ASTDoWhile>(data)) { + return &std::get_if<ASTDoWhile>(&data)->nodes; + } + return nullptr; + } + + void Clear() { + next.reset(); + previous.reset(); + parent.reset(); + manager = nullptr; + } + +private: + friend class ASTZipper; + + ASTData data; + ASTNode parent{}; + ASTNode next{}; + ASTNode previous{}; + ASTZipper* manager{}; +}; + +class ASTManager final { +public: + ASTManager(bool full_decompile, bool disable_else_derivation); + ~ASTManager(); + + ASTManager(const ASTManager& o) = delete; + ASTManager& operator=(const ASTManager& other) = delete; + + ASTManager(ASTManager&& other) noexcept = default; + ASTManager& operator=(ASTManager&& other) noexcept = default; + + void Init(); + + void DeclareLabel(u32 address); + + void InsertLabel(u32 address); + + void InsertGoto(Expr condition, u32 address); + + void InsertBlock(u32 start_address, u32 end_address); + + void InsertReturn(Expr condition, bool kills); + + std::string Print() const; + + void Decompile(); + + void ShowCurrentState(std::string_view state) const; + + void SanityCheck() const; + + void Clear(); + + bool IsFullyDecompiled() const { + if (full_decompile) { + return gotos.empty(); + } + + for (ASTNode goto_node : gotos) { + auto label_index = goto_node->GetGotoLabel(); + if (!label_index) { + return false; + } + ASTNode glabel = labels[*label_index]; + if (IsBackwardsJump(goto_node, glabel)) { + return false; + } + } + return true; + } + + ASTNode GetProgram() const { + return main_node; + } + + u32 GetVariables() const { + return variables; + } + + const std::vector<ASTNode>& GetLabels() const { + return labels; + } + +private: + bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; + + bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; + + bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; + + void EncloseDoWhile(ASTNode goto_node, ASTNode label); + + void EncloseIfThen(ASTNode goto_node, ASTNode label); + + void MoveOutward(ASTNode goto_node); + + u32 NewVariable() { + return variables++; + } + + bool full_decompile{}; + bool disable_else_derivation{}; + std::unordered_map<u32, u32> labels_map{}; + u32 labels_count{}; + std::vector<ASTNode> labels{}; + std::list<ASTNode> gotos{}; + u32 variables{}; + ASTProgram* program{}; + ASTNode main_node{}; + Expr false_condition{}; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp new file mode 100644 index 000000000..cddcbd4f0 --- /dev/null +++ b/src/video_core/shader/compiler_settings.cpp @@ -0,0 +1,26 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/shader/compiler_settings.h" + +namespace VideoCommon::Shader { + +std::string CompileDepthAsString(const CompileDepth cd) { + switch (cd) { + case CompileDepth::BruteForce: + return "Brute Force Compile"; + case CompileDepth::FlowStack: + return "Simple Flow Stack Mode"; + case CompileDepth::NoFlowStack: + return "Remove Flow Stack"; + case CompileDepth::DecompileBackwards: + return "Decompile Backward Jumps"; + case CompileDepth::FullDecompile: + return "Full Decompilation"; + default: + return "Unknown Compiler Process"; + } +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h new file mode 100644 index 000000000..916018c01 --- /dev/null +++ b/src/video_core/shader/compiler_settings.h @@ -0,0 +1,26 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/engines/shader_bytecode.h" + +namespace VideoCommon::Shader { + +enum class CompileDepth : u32 { + BruteForce = 0, + FlowStack = 1, + NoFlowStack = 2, + DecompileBackwards = 3, + FullDecompile = 4, +}; + +std::string CompileDepthAsString(CompileDepth cd); + +struct CompilerSettings { + CompileDepth depth{CompileDepth::NoFlowStack}; + bool disable_else_derivation{true}; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp new file mode 100644 index 000000000..fe467608e --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -0,0 +1,110 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <memory> +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/shader/const_buffer_locker.h" + +namespace VideoCommon::Shader { + +using Tegra::Engines::SamplerDescriptor; + +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) + : stage{shader_stage} {} + +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine) + : stage{shader_stage}, engine{&engine} {} + +ConstBufferLocker::~ConstBufferLocker() = default; + +std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { + const std::pair<u32, u32> key = {buffer, offset}; + const auto iter = keys.find(key); + if (iter != keys.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); + keys.emplace(key, value); + return value; +} + +std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) { + const u32 key = offset; + const auto iter = bound_samplers.find(key); + if (iter != bound_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); + bound_samplers.emplace(key, value); + return value; +} + +std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler( + u32 buffer, u32 offset) { + const std::pair key = {buffer, offset}; + const auto iter = bindless_samplers.find(key); + if (iter != bindless_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); + bindless_samplers.emplace(key, value); + return value; +} + +void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { + keys.insert_or_assign({buffer, offset}, value); +} + +void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { + bound_samplers.insert_or_assign(offset, sampler); +} + +void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { + bindless_samplers.insert_or_assign({buffer, offset}, sampler); +} + +bool ConstBufferLocker::IsConsistent() const { + if (!engine) { + return false; + } + return std::all_of(keys.begin(), keys.end(), + [this](const auto& pair) { + const auto [cbuf, offset] = pair.first; + const auto value = pair.second; + return value == engine->AccessConstBuffer32(stage, cbuf, offset); + }) && + std::all_of(bound_samplers.begin(), bound_samplers.end(), + [this](const auto& sampler) { + const auto [key, value] = sampler; + return value == engine->AccessBoundSampler(stage, key); + }) && + std::all_of(bindless_samplers.begin(), bindless_samplers.end(), + [this](const auto& sampler) { + const auto [cbuf, offset] = sampler.first; + const auto value = sampler.second; + return value == engine->AccessBindlessSampler(stage, cbuf, offset); + }); +} + +bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { + return keys == rhs.keys && bound_samplers == rhs.bound_samplers && + bindless_samplers == rhs.bindless_samplers; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h new file mode 100644 index 000000000..600e2f3c3 --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.h @@ -0,0 +1,80 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <unordered_map> +#include "common/common_types.h" +#include "common/hash.h" +#include "video_core/engines/const_buffer_engine_interface.h" + +namespace VideoCommon::Shader { + +using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; +using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; +using BindlessSamplerMap = + std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; + +/** + * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader + * compiler. with it, the shader can obtain required data from GPU state and store it for disk + * shader compilation. + **/ +class ConstBufferLocker { +public: + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); + + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine); + + ~ConstBufferLocker(); + + /// Retrieves a key from the locker, if it's registered, it will give the registered value, if + /// not it will obtain it from maxwell3d and register it. + std::optional<u32> ObtainKey(u32 buffer, u32 offset); + + std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); + + std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); + + /// Inserts a key. + void InsertKey(u32 buffer, u32 offset, u32 value); + + /// Inserts a bound sampler key. + void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Inserts a bindless sampler key. + void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Checks keys and samplers against engine's current const buffers. Returns true if they are + /// the same value, false otherwise; + bool IsConsistent() const; + + /// Returns true if the keys are equal to the other ones in the locker. + bool HasEqualKeys(const ConstBufferLocker& rhs) const; + + /// Gives an getter to the const buffer keys in the database. + const KeyMap& GetKeys() const { + return keys; + } + + /// Gets samplers database. + const BoundSamplerMap& GetBoundSamplers() const { + return bound_samplers; + } + + /// Gets bindless samplers database. + const BindlessSamplerMap& GetBindlessSamplers() const { + return bindless_samplers; + } + +private: + const Tegra::Engines::ShaderType stage; + Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; + KeyMap keys; + BoundSamplerMap bound_samplers; + BindlessSamplerMap bindless_samplers; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index ec3a76690..b427ac873 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -4,18 +4,21 @@ #include <list> #include <map> +#include <set> #include <stack> #include <unordered_map> -#include <unordered_set> #include <vector> #include "common/assert.h" #include "common/common_types.h" +#include "video_core/shader/ast.h" #include "video_core/shader/control_flow.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { + namespace { + using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -34,14 +37,20 @@ struct BlockStack { std::stack<u32> pbk_stack{}; }; -struct BlockBranchInfo { - Condition condition{}; - s32 address{exit_branch}; - bool kill{}; - bool is_sync{}; - bool is_brk{}; - bool ignore{}; -}; +template <typename T, typename... Args> +BlockBranchInfo MakeBranchInfo(Args&&... args) { + static_assert(std::is_convertible_v<T, BranchData>); + return std::make_shared<BranchData>(T(std::forward<Args>(args)...)); +} + +bool BlockBranchIsIgnored(BlockBranchInfo first) { + bool ignore = false; + if (std::holds_alternative<SingleBranch>(*first)) { + const auto branch = std::get_if<SingleBranch>(first.get()); + ignore = branch->ignore; + } + return ignore; +} struct BlockInfo { u32 start{}; @@ -55,21 +64,21 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, - const u32 start) - : start{start}, program_code{program_code}, program_size{program_size} {} + explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) + : program_code{program_code}, start{start}, locker{locker} {} - u32 start{}; - std::vector<BlockInfo> block_info{}; - std::list<u32> inspect_queries{}; - std::list<Query> queries{}; - std::unordered_map<u32, u32> registered{}; - std::unordered_set<u32> labels{}; - std::map<u32, u32> ssy_labels{}; - std::map<u32, u32> pbk_labels{}; - std::unordered_map<u32, BlockStack> stacks{}; const ProgramCode& program_code; - const std::size_t program_size; + ConstBufferLocker& locker; + u32 start{}; + std::vector<BlockInfo> block_info; + std::list<u32> inspect_queries; + std::list<Query> queries; + std::unordered_map<u32, u32> registered; + std::set<u32> labels; + std::map<u32, u32> ssy_labels; + std::map<u32, u32> pbk_labels; + std::unordered_map<u32, BlockStack> stacks; + ASTManager* manager{}; }; enum class BlockCollision : u32 { None, Found, Inside }; @@ -102,7 +111,7 @@ BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { } Pred GetPredicate(u32 index, bool negated) { - return static_cast<Pred>(index + (negated ? 8 : 0)); + return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL)); } /** @@ -122,10 +131,122 @@ enum class ParseResult : u32 { AbnormalFlow, }; +struct BranchIndirectInfo { + u32 buffer{}; + u32 offset{}; + u32 entries{}; + s32 relative_position{}; +}; + +struct BufferInfo { + u32 index; + u32 offset; +}; + +std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) { + const Instruction instr = state.program_code[pos]; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() != OpCode::Id::BRX) { + return std::nullopt; + } + if (instr.brx.constant_buffer != 0) { + return std::nullopt; + } + --pos; + return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); +} + +template <typename Result, typename TestCallable, typename PackCallable> +// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&> +// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&> +std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, + PackCallable pack) { + for (; pos >= state.start; --pos) { + if (IsSchedInstruction(pos, state.start)) { + continue; + } + const Instruction instr = state.program_code[pos]; + const auto opcode = OpCode::Decode(instr); + if (!opcode) { + continue; + } + if (test(instr, opcode->get())) { + --pos; + return std::make_optional(pack(instr, opcode->get())); + } + } + return std::nullopt; +} + +std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos, + u64 brx_tracked_register) { + return TrackInstruction<std::pair<BufferInfo, u64>>( + state, pos, + [brx_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::LD_C && + instr.gpr0.Value() == brx_tracked_register && + instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; + }, + [](auto instr, const auto& opcode) { + const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()), + static_cast<u32>(instr.cbuf36.GetOffset())}; + return std::make_pair(info, instr.gpr8.Value()); + }); +} + +std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, + u64 ldc_tracked_register) { + return TrackInstruction<u64>(state, pos, + [ldc_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::SHL_IMM && + instr.gpr0.Value() == ldc_tracked_register; + }, + [](auto instr, const auto&) { return instr.gpr8.Value(); }); +} + +std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, + u64 shl_tracked_register) { + return TrackInstruction<u32>(state, pos, + [shl_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::IMNMX_IMM && + instr.gpr0.Value() == shl_tracked_register; + }, + [](auto instr, const auto&) { + return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); + }); +} + +std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { + const auto brx_info = GetBRXInfo(state, pos); + if (!brx_info) { + return std::nullopt; + } + const auto [relative_position, brx_tracked_register] = *brx_info; + + const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); + if (!ldc_info) { + return std::nullopt; + } + const auto [buffer_info, ldc_tracked_register] = *ldc_info; + + const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); + if (!shl_tracked_register) { + return std::nullopt; + } + + const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); + if (!entries) { + return std::nullopt; + } + + return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; +} + std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { u32 offset = static_cast<u32>(address); - const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); + const u32 end_address = static_cast<u32>(state.program_code.size()); ParseInfo parse_info{}; + SingleBranch single_branch{}; const auto insert_label = [](CFGRebuildState& state, u32 address) { const auto pair = state.labels.emplace(address); @@ -138,13 +259,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) if (offset >= end_address) { // ASSERT_OR_EXECUTE can't be used, as it ignores the break ASSERT_MSG(false, "Shader passed the current limit!"); - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.ignore = false; + + single_branch.address = exit_branch; + single_branch.ignore = false; break; } if (state.registered.count(offset) != 0) { - parse_info.branch_info.address = offset; - parse_info.branch_info.ignore = true; + single_branch.address = offset; + single_branch.ignore = true; break; } if (IsSchedInstruction(offset, state.start)) { @@ -161,24 +283,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) switch (opcode->get().GetId()) { case OpCode::Id::EXIT: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = exit_branch; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } @@ -187,99 +311,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) return {ParseResult::AbnormalFlow, parse_info}; } const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } const u32 branch_offset = offset + instr.bra.GetBranchTarget(); if (branch_offset == 0) { - parse_info.branch_info.address = exit_branch; + single_branch.address = exit_branch; } else { - parse_info.branch_info.address = branch_offset; + single_branch.address = branch_offset; } insert_label(state, branch_offset); - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::SYNC: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = unassigned_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = true; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = unassigned_branch; + single_branch.kill = false; + single_branch.is_sync = true; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::BRK: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = unassigned_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = true; - parse_info.branch_info.ignore = false; + single_branch.address = unassigned_branch; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = true; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::KIL: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.kill = true; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = exit_branch; + single_branch.kill = true; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } @@ -296,7 +428,30 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) break; } case OpCode::Id::BRX: { - return {ParseResult::AbnormalFlow, parse_info}; + const auto tmp = TrackBranchIndirectInfo(state, offset); + if (!tmp) { + LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); + return {ParseResult::AbnormalFlow, parse_info}; + } + + const auto result = *tmp; + const s32 pc_target = offset + result.relative_position; + std::vector<CaseBranch> branches; + for (u32 i = 0; i < result.entries; i++) { + auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); + if (!key) { + return {ParseResult::AbnormalFlow, parse_info}; + } + u32 value = *key; + u32 target = static_cast<u32>((value >> 3) + pc_target); + insert_label(state, target); + branches.emplace_back(value, target); + } + parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<MultiBranch>( + static_cast<u32>(instr.gpr8.Value()), std::move(branches)); + + return {ParseResult::ControlCaught, parse_info}; } default: break; @@ -304,10 +459,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) offset++; } - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; parse_info.end_address = offset - 1; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, + single_branch.is_brk, single_branch.ignore); return {ParseResult::BlockEnd, parse_info}; } @@ -331,9 +489,10 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& current_block = state.block_info[block_index]; current_block.end = address - 1; new_block.branch = current_block.branch; - BlockBranchInfo forward_branch{}; - forward_branch.address = address; - forward_branch.ignore = true; + BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); + const auto branch = std::get_if<SingleBranch>(forward_branch.get()); + branch->address = address; + branch->ignore = true; current_block.branch = forward_branch; return true; } @@ -348,12 +507,15 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); block_info.branch = parse_info.branch_info; - if (parse_info.branch_info.condition.IsUnconditional()) { + if (std::holds_alternative<SingleBranch>(*block_info.branch)) { + const auto branch = std::get_if<SingleBranch>(block_info.branch.get()); + if (branch->condition.IsUnconditional()) { + return true; + } + const u32 fallthrough_address = parse_info.end_address + 1; + state.inspect_queries.push_front(fallthrough_address); return true; } - - const u32 fallthrough_address = parse_info.end_address + 1; - state.inspect_queries.push_front(fallthrough_address); return true; } @@ -391,91 +553,205 @@ bool TryQuery(CFGRebuildState& state) { state.queries.pop_front(); gather_labels(q2.ssy_stack, state.ssy_labels, block); gather_labels(q2.pbk_stack, state.pbk_labels, block); - if (!block.branch.condition.IsUnconditional()) { - q2.address = block.end + 1; - state.queries.push_back(q2); - } + if (std::holds_alternative<SingleBranch>(*block.branch)) { + const auto branch = std::get_if<SingleBranch>(block.branch.get()); + if (!branch->condition.IsUnconditional()) { + q2.address = block.end + 1; + state.queries.push_back(q2); + } - Query conditional_query{q2}; - if (block.branch.is_sync) { - if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.ssy_stack.top(); + Query conditional_query{q2}; + if (branch->is_sync) { + if (branch->address == unassigned_branch) { + branch->address = conditional_query.ssy_stack.top(); + } + conditional_query.ssy_stack.pop(); } - conditional_query.ssy_stack.pop(); - } - if (block.branch.is_brk) { - if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.pbk_stack.top(); + if (branch->is_brk) { + if (branch->address == unassigned_branch) { + branch->address = conditional_query.pbk_stack.top(); + } + conditional_query.pbk_stack.pop(); } - conditional_query.pbk_stack.pop(); + conditional_query.address = branch->address; + state.queries.push_back(std::move(conditional_query)); + return true; + } + const auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); + for (const auto& branch_case : multi_branch->branches) { + Query conditional_query{q2}; + conditional_query.address = branch_case.address; + state.queries.push_back(std::move(conditional_query)); } - conditional_query.address = block.branch.address; - state.queries.push_back(std::move(conditional_query)); return true; } + } // Anonymous namespace -std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, - std::size_t program_size, u32 start_address) { - CFGRebuildState state{program_code, program_size, start_address}; +void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { + const auto get_expr = ([&](const Condition& cond) -> Expr { + Expr result{}; + if (cond.cc != ConditionCode::T) { + result = MakeExpr<ExprCondCode>(cond.cc); + } + if (cond.predicate != Pred::UnusedIndex) { + u32 pred = static_cast<u32>(cond.predicate); + bool negate = false; + if (pred > 7) { + negate = true; + pred -= 8; + } + Expr extra = MakeExpr<ExprPredicate>(pred); + if (negate) { + extra = MakeExpr<ExprNot>(extra); + } + if (result) { + return MakeExpr<ExprAnd>(extra, result); + } + return extra; + } + if (result) { + return result; + } + return MakeExpr<ExprBoolean>(true); + }); + if (std::holds_alternative<SingleBranch>(*branch_info)) { + const auto branch = std::get_if<SingleBranch>(branch_info.get()); + if (branch->address < 0) { + if (branch->kill) { + mm.InsertReturn(get_expr(branch->condition), true); + return; + } + mm.InsertReturn(get_expr(branch->condition), false); + return; + } + mm.InsertGoto(get_expr(branch->condition), branch->address); + return; + } + const auto multi_branch = std::get_if<MultiBranch>(branch_info.get()); + for (const auto& branch_case : multi_branch->branches) { + mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), + branch_case.address); + } +} + +void DecompileShader(CFGRebuildState& state) { + state.manager->Init(); + for (auto label : state.labels) { + state.manager->DeclareLabel(label); + } + for (auto& block : state.block_info) { + if (state.labels.count(block.start) != 0) { + state.manager->InsertLabel(block.start); + } + const bool ignore = BlockBranchIsIgnored(block.branch); + u32 end = ignore ? block.end + 1 : block.end; + state.manager->InsertBlock(block.start, end); + if (!ignore) { + InsertBranch(*state.manager, block.branch); + } + } + state.manager->Decompile(); +} + +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, + const CompilerSettings& settings, + ConstBufferLocker& locker) { + auto result_out = std::make_unique<ShaderCharacteristics>(); + if (settings.depth == CompileDepth::BruteForce) { + result_out->settings.depth = CompileDepth::BruteForce; + return result_out; + } + CFGRebuildState state{program_code, start_address, locker}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); state.inspect_queries.push_back(state.start); while (!state.inspect_queries.empty()) { if (!TryInspectAddress(state)) { - return {}; + result_out->settings.depth = CompileDepth::BruteForce; + return result_out; } } - // Decompile Stacks - state.queries.push_back(Query{state.start, {}, {}}); - bool decompiled = true; - while (!state.queries.empty()) { - if (!TryQuery(state)) { - decompiled = false; - break; + bool use_flow_stack = true; + + bool decompiled = false; + + if (settings.depth != CompileDepth::FlowStack) { + // Decompile Stacks + state.queries.push_back(Query{state.start, {}, {}}); + decompiled = true; + while (!state.queries.empty()) { + if (!TryQuery(state)) { + decompiled = false; + break; + } } } + use_flow_stack = !decompiled; + // Sort and organize results std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; }); - ShaderCharacteristics result_out{}; - result_out.decompilable = decompiled; - result_out.start = start_address; - result_out.end = start_address; - for (const auto& block : state.block_info) { + [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); + if (decompiled && settings.depth != CompileDepth::NoFlowStack) { + ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, + settings.disable_else_derivation}; + state.manager = &manager; + DecompileShader(state); + decompiled = state.manager->IsFullyDecompiled(); + if (!decompiled) { + if (settings.depth == CompileDepth::FullDecompile) { + LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); + } else { + LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); + } + state.manager->ShowCurrentState("Of Shader"); + state.manager->Clear(); + } else { + auto characteristics = std::make_unique<ShaderCharacteristics>(); + characteristics->start = start_address; + characteristics->settings.depth = settings.depth; + characteristics->manager = std::move(manager); + characteristics->end = state.block_info.back().end + 1; + return characteristics; + } + } + + result_out->start = start_address; + result_out->settings.depth = + use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; + result_out->blocks.clear(); + for (auto& block : state.block_info) { ShaderBlock new_block{}; new_block.start = block.start; new_block.end = block.end; - new_block.ignore_branch = block.branch.ignore; + new_block.ignore_branch = BlockBranchIsIgnored(block.branch); if (!new_block.ignore_branch) { - new_block.branch.cond = block.branch.condition; - new_block.branch.kills = block.branch.kill; - new_block.branch.address = block.branch.address; + new_block.branch = block.branch; } - result_out.end = std::max(result_out.end, block.end); - result_out.blocks.push_back(new_block); + result_out->end = std::max(result_out->end, block.end); + result_out->blocks.push_back(new_block); } - if (result_out.decompilable) { - result_out.labels = std::move(state.labels); - return {std::move(result_out)}; + if (!use_flow_stack) { + result_out->labels = std::move(state.labels); + return result_out; } - // If it's not decompilable, merge the unlabelled blocks together - auto back = result_out.blocks.begin(); + auto back = result_out->blocks.begin(); auto next = std::next(back); - while (next != result_out.blocks.end()) { + while (next != result_out->blocks.end()) { if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { back->end = next->end; - next = result_out.blocks.erase(next); + next = result_out->blocks.erase(next); continue; } back = next; ++next; } - return {std::move(result_out)}; + + return result_out; } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index b0a5e4f8c..5304998b9 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -6,9 +6,12 @@ #include <list> #include <optional> -#include <unordered_set> +#include <set> +#include <variant> #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/ast.h" +#include "video_core/shader/compiler_settings.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -35,29 +38,61 @@ struct Condition { } }; -struct ShaderBlock { - struct Branch { - Condition cond{}; - bool kills{}; - s32 address{}; +class SingleBranch { +public: + SingleBranch() = default; + SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk, + bool ignore) + : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk}, + ignore{ignore} {} + + bool operator==(const SingleBranch& b) const { + return std::tie(condition, address, kill, is_sync, is_brk, ignore) == + std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); + } + + bool operator!=(const SingleBranch& b) const { + return !operator==(b); + } + + Condition condition{}; + s32 address{exit_branch}; + bool kill{}; + bool is_sync{}; + bool is_brk{}; + bool ignore{}; +}; - bool operator==(const Branch& b) const { - return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); - } +struct CaseBranch { + CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {} + u32 cmp_value; + u32 address; +}; + +class MultiBranch { +public: + MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches) + : gpr{gpr}, branches{std::move(branches)} {} + + u32 gpr{}; + std::vector<CaseBranch> branches{}; +}; + +using BranchData = std::variant<SingleBranch, MultiBranch>; +using BlockBranchInfo = std::shared_ptr<BranchData>; - bool operator!=(const Branch& b) const { - return !operator==(b); - } - }; +bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); +struct ShaderBlock { u32 start{}; u32 end{}; bool ignore_branch{}; - Branch branch{}; + BlockBranchInfo branch{}; bool operator==(const ShaderBlock& sb) const { - return std::tie(start, end, ignore_branch, branch) == - std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); + return std::tie(start, end, ignore_branch) == + std::tie(sb.start, sb.end, sb.ignore_branch) && + BlockBranchInfoAreEqual(branch, sb.branch); } bool operator!=(const ShaderBlock& sb) const { @@ -67,13 +102,15 @@ struct ShaderBlock { struct ShaderCharacteristics { std::list<ShaderBlock> blocks{}; - bool decompilable{}; + std::set<u32> labels{}; u32 start{}; u32 end{}; - std::unordered_set<u32> labels{}; + ASTManager manager{true, true}; + CompilerSettings settings{}; }; -std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, - std::size_t program_size, u32 start_address); +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, + const CompilerSettings& settings, + ConstBufferLocker& locker); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 47a9fd961..22c3e5120 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -33,60 +33,140 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } -} // namespace +} // Anonymous namespace + +class ASTDecoder { +public: + ASTDecoder(ShaderIR& ir) : ir(ir) {} + + void operator()(ASTProgram& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(ASTIfThen& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(ASTIfElse& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(ASTBlockEncoded& ast) {} + + void operator()(ASTBlockDecoded& ast) {} + + void operator()(ASTVarSet& ast) {} + + void operator()(ASTLabel& ast) {} + + void operator()(ASTGoto& ast) {} + + void operator()(ASTDoWhile& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(ASTReturn& ast) {} + + void operator()(ASTBreak& ast) {} + + void Visit(ASTNode& node) { + std::visit(*this, *node->GetInnerData()); + if (node->IsBlockEncoded()) { + auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData()); + NodeBlock bb = ir.DecodeRange(block->start, block->end); + node->TransformBlockEncoded(std::move(bb)); + } + } + +private: + ShaderIR& ir; +}; void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - disable_flow_stack = false; - const auto info = ScanFlow(program_code, program_size, main_offset); - if (info) { - const auto& shader_info = *info; - coverage_begin = shader_info.start; - coverage_end = shader_info.end; - if (shader_info.decompilable) { - disable_flow_stack = true; - const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == static_cast<u32>(exit_branch)) { - return; - } - basic_blocks.insert({label, nodes}); - }; - const auto& blocks = shader_info.blocks; - NodeBlock current_block; - u32 current_label = static_cast<u32>(exit_branch); - for (auto& block : blocks) { - if (shader_info.labels.count(block.start) != 0) { - insert_block(current_block, current_label); - current_block.clear(); - current_label = block.start; - } - if (!block.ignore_branch) { - DecodeRangeInner(current_block, block.start, block.end); - InsertControlFlow(current_block, block); - } else { - DecodeRangeInner(current_block, block.start, block.end + 1); - } - } - insert_block(current_block, current_label); - return; - } - LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); - // we can't decompile it, fallback to standard method + decompiled = false; + auto info = ScanFlow(program_code, main_offset, settings, locker); + auto& shader_info = *info; + coverage_begin = shader_info.start; + coverage_end = shader_info.end; + switch (shader_info.settings.depth) { + case CompileDepth::FlowStack: { for (const auto& block : shader_info.blocks) { basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); } - return; + break; } - LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); - - // Now we need to deal with an undecompilable shader. We need to brute force - // a shader that captures every position. - coverage_begin = main_offset; - const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); - coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; label++) { - basic_blocks.insert({label, DecodeRange(label, label + 1)}); + case CompileDepth::NoFlowStack: { + disable_flow_stack = true; + const auto insert_block = [this](NodeBlock& nodes, u32 label) { + if (label == static_cast<u32>(exit_branch)) { + return; + } + basic_blocks.insert({label, nodes}); + }; + const auto& blocks = shader_info.blocks; + NodeBlock current_block; + u32 current_label = static_cast<u32>(exit_branch); + for (auto& block : blocks) { + if (shader_info.labels.count(block.start) != 0) { + insert_block(current_block, current_label); + current_block.clear(); + current_label = block.start; + } + if (!block.ignore_branch) { + DecodeRangeInner(current_block, block.start, block.end); + InsertControlFlow(current_block, block); + } else { + DecodeRangeInner(current_block, block.start, block.end + 1); + } + } + insert_block(current_block, current_label); + break; + } + case CompileDepth::DecompileBackwards: + case CompileDepth::FullDecompile: { + program_manager = std::move(shader_info.manager); + disable_flow_stack = true; + decompiled = true; + ASTDecoder decoder{*this}; + ASTNode program = GetASTProgram(); + decoder.Visit(program); + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); + [[fallthrough]]; + case CompileDepth::BruteForce: { + const auto shader_end = static_cast<u32>(program_code.size()); + coverage_begin = main_offset; + coverage_end = shader_end; + for (u32 label = main_offset; label < shader_end; ++label) { + basic_blocks.insert({label, DecodeRange(label, label + 1)}); + } + break; + } + } + if (settings.depth != shader_info.settings.depth) { + LOG_WARNING( + HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", + CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); } } @@ -118,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { } return result; }; - if (block.branch.address < 0) { - if (block.branch.kills) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(block.branch.cond, n); + if (std::holds_alternative<SingleBranch>(*block.branch)) { + auto branch = std::get_if<SingleBranch>(block.branch.get()); + if (branch->address < 0) { + if (branch->kill) { + Node n = Operation(OperationCode::Discard); + n = apply_conditions(branch->condition, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Exit); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(block.branch.cond, n); + Node n = Operation(OperationCode::Branch, Immediate(branch->address)); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); - n = apply_conditions(block.branch.cond, n); - bb.push_back(n); - global_code.push_back(n); + auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); + Node op_a = GetRegister(multi_branch->gpr); + for (auto& branch_case : multi_branch->branches) { + Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); + Node op_b = Immediate(branch_case.cmp_value); + Node condition = + GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); + auto result = Conditional(condition, {n}); + bb.push_back(result); + global_code.push_back(result); + } } u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 1473c282a..fcedd2af6 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -43,12 +43,12 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. if (instr.fmul.tab5cb8_2 != 0) { - LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); + LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); } if (instr.fmul.tab5c68_0 != 1) { - LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); + LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", + instr.fmul.tab5c68_0.Value()); } op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); @@ -144,10 +144,11 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::RRO_C: case OpCode::Id::RRO_R: case OpCode::Id::RRO_IMM: { + LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); + // Currently RRO is only implemented as a register move. op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); SetRegister(bb, instr.gpr0, op_b); - LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); break; } default: diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index b06cbe441..ee7d9a29d 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -21,8 +21,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { if (opcode->get().GetId() == OpCode::Id::HADD2_C || opcode->get().GetId() == OpCode::Id::HADD2_R) { - if (instr.alu_half.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 6466fc011..d179b9873 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -19,12 +19,12 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - if (instr.alu_half_imm.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half_imm.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } else { - if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index b73f6536e..a33d242e9 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -144,7 +144,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::ICMP_IMM: { const Node zero = Immediate(0); - const auto [op_b, test] = [&]() -> std::pair<Node, Node> { + const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::ICMP_CR: return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), @@ -161,10 +161,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { return {zero, zero}; } }(); - const Node op_a = GetRegister(instr.gpr8); + const Node op_lhs = GetRegister(instr.gpr8); const Node comparison = GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); - SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b)); + SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); break; } case OpCode::Id::LOP_C: diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index ca2f39e8d..5973588d6 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -19,10 +19,10 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); if (instr.ffma.tab5980_0 != 1) { - LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); + LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); } if (instr.ffma.tab5980_1 != 0) { - LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); + LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); } const Node op_a = GetRegister(instr.gpr8); diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 48ca7a4af..848e46874 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -20,8 +20,8 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - if (instr.hset2.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.hset2.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index 840694527..310655619 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" @@ -18,7 +19,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - DEBUG_ASSERT(instr.hsetp2.ftz == 0); + if (instr.hsetp2.ftz != 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); @@ -32,6 +35,8 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { h_and = instr.hsetp2.cbuf_and_imm.h_and; op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); + // F32 is hardcoded in hardware + op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); break; case OpCode::Id::HSETP2_IMM: cond = instr.hsetp2.cbuf_and_imm.cond; diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 95ec1cdd9..d2fe4ec5d 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -143,39 +143,37 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { } Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { - const auto offset{static_cast<std::size_t>(image.index.Value())}; - if (const auto image = TryUseExistingImage(offset, type)) { - return *image; + const auto offset = static_cast<u32>(image.index.Value()); + + const auto it = + std::find_if(std::begin(used_images), std::end(used_images), + [offset](const Image& entry) { return entry.GetOffset() == offset; }); + if (it != std::end(used_images)) { + ASSERT(!it->IsBindless() && it->GetType() == it->GetType()); + return *it; } - const std::size_t next_index{used_images.size()}; - return used_images.emplace(offset, Image{offset, next_index, type}).first->second; + const auto next_index = static_cast<u32>(used_images.size()); + return used_images.emplace_back(next_index, offset, type); } Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { - const Node image_register{GetRegister(reg)}; - const auto [base_image, cbuf_index, cbuf_offset]{ - TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; - const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; - - if (const auto image = TryUseExistingImage(cbuf_key, type)) { - return *image; - } - - const std::size_t next_index{used_images.size()}; - return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type}) - .first->second; -} - -Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type) { - auto it = used_images.find(offset); - if (it == used_images.end()) { - return nullptr; + const Node image_register = GetRegister(reg); + const auto [base_image, buffer, offset] = + TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); + + const auto it = + std::find_if(std::begin(used_images), std::end(used_images), + [buffer = buffer, offset = offset](const Image& entry) { + return entry.GetBuffer() == buffer && entry.GetOffset() == offset; + }); + if (it != std::end(used_images)) { + ASSERT(it->IsBindless() && it->GetType() == it->GetType()); + return *it; } - auto& image = it->second; - ASSERT(image.GetType() == type); - return ℑ + const auto next_index = static_cast<u32>(used_images.size()); + return used_images.emplace_back(next_index, offset, buffer, type); } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7923d4d69..335d78146 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -166,9 +166,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }(); const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, instr, false); + TrackGlobalMemory(bb, instr, false); const u32 count = GetUniformTypeElementsCount(type); + if (!real_address_base || !base_address) { + // Tracking failed, load zeroes. + for (u32 i = 0; i < count; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); + } + break; + } + for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = @@ -260,22 +268,19 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }(); const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, instr, true); - - // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} - SetTemporary(bb, 0, real_address_base); + TrackGlobalMemory(bb, instr, true); + if (!real_address_base || !base_address) { + // Tracking failed, skip the store. + break; + } const u32 count = GetUniformTypeElementsCount(type); for (u32 i = 0; i < count; ++i) { - SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); - } - for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); - const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - - bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); + const Node value = GetRegister(instr.gpr0.Value() + i); + bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } @@ -301,15 +306,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return pc; } -std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, - Instruction instr, - bool is_write) { +std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, + Instruction instr, + bool is_write) { const auto addr_register{GetRegister(instr.gmem.gpr)}; const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; const auto [base_address, index, offset] = TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - ASSERT(base_address != nullptr); + ASSERT_OR_EXECUTE_MSG(base_address != nullptr, + { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, + "Global memory tracking failed"); bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d46e0f823..17cd45d3c 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -67,7 +67,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::MOV_SYS: { - const Node value = [&]() { + const Node value = [this, instr] { switch (instr.sys20) { case SystemVariable::Ydirection: return Operation(OperationCode::YNegate); @@ -256,7 +256,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::DEPBAR: { - LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); + LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); break; } default: diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index f6ee68a54..d419e9c45 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -18,7 +18,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() { + Node op_b = [this, instr] { if (instr.is_b_imm) { return Immediate(instr.alu.GetSignedImm20_20()); } else if (instr.is_b_gpr) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b934a069..bb926a132 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -44,10 +44,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { bool is_bindless = false; switch (opcode->get().GetId()) { case OpCode::Id::TEX: { - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); - } - const TextureType texture_type{instr.tex.texture_type}; const bool is_array = instr.tex.array != 0; const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); @@ -62,10 +58,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); - } - const TextureType texture_type{instr.tex_b.texture_type}; const bool is_array = instr.tex_b.array != 0; const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); @@ -82,10 +74,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); const auto process_mode = instr.texs.GetTextureProcessMode(); - if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); - } - const Node4 components = GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); @@ -96,6 +84,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::TLD4_B: { + is_bindless = true; + [[fallthrough]]; + } case OpCode::Id::TLD4: { ASSERT(instr.tld4.array == 0); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), @@ -103,24 +95,20 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), "PTP is not implemented"); - if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); - } - const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); + const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) + : instr.tld4.UsesMiscMode(TextureMiscMode::DC); const bool is_array = instr.tld4.array != 0; - const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); + const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) + : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); WriteTexInstructionFloat( - bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); + bb, instr, + GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless)); break; } case OpCode::Id::TLD4S: { UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); - } const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); const Node op_a = GetRegister(instr.gpr8); @@ -141,7 +129,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); const auto& sampler = - GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); + GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -150,25 +138,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } - WriteTexsInstructionFloat(bb, instr, values); + WriteTexsInstructionFloat(bb, instr, values, true); break; } case OpCode::Id::TXQ_B: is_bindless = true; [[fallthrough]]; case OpCode::Id::TXQ: { - if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); - } - // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - is_bindless - ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, - false) - : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); u32 indexer = 0; switch (instr.txq.query_type) { @@ -201,15 +182,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), "NDV is not implemented"); - if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); - } - auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = is_bindless - ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) - : GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = + is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) + : GetSampler(instr.sampler, {{texture_type, is_array, false}}); std::vector<Node> coords; @@ -250,25 +227,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); - if (instr.tld.nodep_flag) { - LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete"); - } - WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); break; } case OpCode::Id::TLDS: { - const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; + const TextureType texture_type{instr.tlds.GetTextureType()}; const bool is_array{instr.tlds.IsArrayTexture()}; UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); - } - const Node4 components = GetTldsCode(instr, texture_type, is_array); if (instr.tlds.fp32_flag) { @@ -285,48 +254,84 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { return pc; } -const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, - bool is_array, bool is_shadow) { - const auto offset = static_cast<std::size_t>(sampler.index.Value()); +const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, + std::optional<SamplerInfo> sampler_info) { + const auto offset = static_cast<u32>(sampler.index.Value()); + + TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else if (const auto sampler = locker.ObtainBoundSampler(offset)) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + LOG_WARNING(HW_GPU, "Unknown sampler info"); + type = TextureType::Texture2D; + is_array = false; + is_shadow = false; + } // If this sampler has already been used, return the existing mapping. - const auto itr = + const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [&](const Sampler& entry) { return entry.GetOffset() == offset; }); - if (itr != used_samplers.end()) { - ASSERT(itr->GetType() == type && itr->IsArray() == is_array && - itr->IsShadow() == is_shadow); - return *itr; + [offset](const Sampler& entry) { return entry.GetOffset() == offset; }); + if (it != used_samplers.end()) { + ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && + it->IsShadow() == is_shadow); + return *it; } // Otherwise create a new mapping for this sampler - const std::size_t next_index = used_samplers.size(); - const Sampler entry{offset, next_index, type, is_array, is_shadow}; - return *used_samplers.emplace(entry).first; + const auto next_index = static_cast<u32>(used_samplers.size()); + return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow)); } -const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, - bool is_array, bool is_shadow) { +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, + std::optional<SamplerInfo> sampler_info) { const Node sampler_register = GetRegister(reg); - const auto [base_sampler, cbuf_index, cbuf_offset] = + const auto [base_sampler, buffer, offset] = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); ASSERT(base_sampler != nullptr); - const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); + + TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + LOG_WARNING(HW_GPU, "Unknown sampler info"); + type = TextureType::Texture2D; + is_array = false; + is_shadow = false; + } // If this sampler has already been used, return the existing mapping. - const auto itr = + const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; }); - if (itr != used_samplers.end()) { - ASSERT(itr->GetType() == type && itr->IsArray() == is_array && - itr->IsShadow() == is_shadow); - return *itr; + [buffer = buffer, offset = offset](const Sampler& entry) { + return entry.GetBuffer() == buffer && entry.GetOffset() == offset; + }); + if (it != used_samplers.end()) { + ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && + it->IsShadow() == is_shadow); + return *it; } // Otherwise create a new mapping for this sampler - const std::size_t next_index = used_samplers.size(); - const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow}; - return *used_samplers.emplace(entry).first; + const auto next_index = static_cast<u32>(used_samplers.size()); + return used_samplers.emplace_back( + Sampler(next_index, offset, buffer, type, is_array, is_shadow)); } void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { @@ -344,14 +349,14 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const } } -void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, - const Node4& components) { +void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, + bool ignore_mask) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 u32 dest_elem = 0; for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component)) + if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) continue; SetTemporary(bb, dest_elem++, components[component]); } @@ -411,9 +416,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = is_bindless - ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) - : GetSampler(instr.sampler, texture_type, is_array, is_shadow); + const auto& sampler = + is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) + : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || @@ -553,7 +558,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi) { + bool is_array, bool is_aoffi, bool is_bindless) { const std::size_t coord_count = GetCoordCount(texture_type); // If enabled arrays index is always stored in the gpr8 field @@ -567,6 +572,12 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de } u64 parameter_register = instr.gpr20.Value(); + + const auto& sampler = + is_bindless + ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}}) + : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); + std::vector<Node> aoffi; if (is_aoffi) { aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); @@ -577,12 +588,14 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de dc = GetRegister(parameter_register++); } - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) + : Immediate(static_cast<u32>(instr.tld4.component)); Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; + MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component, + element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -610,7 +623,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -646,7 +659,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 97fc6f9b1..b047cf870 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -23,7 +23,7 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { const Node op_a = GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, instr.video.type_a, instr.video.byte_height_a); - const Node op_b = [&]() { + const Node op_b = [this, instr] { if (instr.video.use_register_b) { return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, instr.video.signed_b, instr.video.type_b, diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index a8e481b3c..d98d0e1dd 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation; using Tegra::Shader::VoteOperation; namespace { + OperationCode GetOperationCode(VoteOperation vote_op) { switch (vote_op) { case VoteOperation::All: @@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) { return OperationCode::VoteAll; } } + } // Anonymous namespace u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { @@ -48,47 +50,57 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { case OpCode::Id::SHFL: { Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) : GetRegister(instr.gpr39); - Node width = [&] { - // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has - // been done reversing Nvidia's math. It won't work on all cases due to SHFL having - // different parameters that don't properly map to GLSL's interface, but it should work - // for cases emitted by Nvidia's compiler. - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation( - OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), - Immediate(8)); - } else { - return Operation(OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, Immediate(0x201F), - Operation(OperationCode::INegate, std::move(mask))), - Immediate(8)); - } - }(); + Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) + : GetRegister(instr.gpr20); + + Node thread_id = Operation(OperationCode::ThreadId); + Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); + Node seg_mask = BitfieldExtract(mask, 8, 16); - const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { + Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); + Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); + Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, + Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); + + Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { switch (instr.shfl.operation) { case ShuffleOperation::Idx: - return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; - case ShuffleOperation::Up: - return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; + return Operation(OperationCode::IBitwiseOr, + Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), + min_thread_id); case ShuffleOperation::Down: - return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; + return Operation(OperationCode::IAdd, thread_id, index); + case ShuffleOperation::Up: + return Operation(OperationCode::IAdd, thread_id, + Operation(OperationCode::INegate, index)); case ShuffleOperation::Bfly: - return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; + return Operation(OperationCode::IBitwiseXor, thread_id, index); } - UNREACHABLE_MSG("Invalid SHFL operation: {}", - static_cast<u64>(instr.shfl.operation.Value())); - return {}; + UNREACHABLE(); + return Immediate(0U); }(); - // Setting the predicate before the register is intentional to avoid overwriting. - Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); + Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { + if (instr.shfl.operation == ShuffleOperation::Up) { + return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); + } else { + return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); + } + }(); + + SetPredicate(bb, instr.shfl.pred48, in_bounds); SetRegister( bb, instr.gpr0, - Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); + Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); + break; + } + case OpCode::Id::FSWZADD: { + UNIMPLEMENTED_IF(instr.fswzadd.ndv); + + Node op_a = GetRegister(instr.gpr8); + Node op_b = GetRegister(instr.gpr20); + Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); + SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); break; } default: diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp new file mode 100644 index 000000000..2647865d4 --- /dev/null +++ b/src/video_core/shader/expr.cpp @@ -0,0 +1,93 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <memory> +#include <variant> + +#include "video_core/shader/expr.h" + +namespace VideoCommon::Shader { +namespace { +bool ExprIsBoolean(const Expr& expr) { + return std::holds_alternative<ExprBoolean>(*expr); +} + +bool ExprBooleanGet(const Expr& expr) { + return std::get_if<ExprBoolean>(expr.get())->value; +} +} // Anonymous namespace + +bool ExprAnd::operator==(const ExprAnd& b) const { + return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); +} + +bool ExprAnd::operator!=(const ExprAnd& b) const { + return !operator==(b); +} + +bool ExprOr::operator==(const ExprOr& b) const { + return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); +} + +bool ExprOr::operator!=(const ExprOr& b) const { + return !operator==(b); +} + +bool ExprNot::operator==(const ExprNot& b) const { + return *operand1 == *b.operand1; +} + +bool ExprNot::operator!=(const ExprNot& b) const { + return !operator==(b); +} + +Expr MakeExprNot(Expr first) { + if (std::holds_alternative<ExprNot>(*first)) { + return std::get_if<ExprNot>(first.get())->operand1; + } + return MakeExpr<ExprNot>(std::move(first)); +} + +Expr MakeExprAnd(Expr first, Expr second) { + if (ExprIsBoolean(first)) { + return ExprBooleanGet(first) ? second : first; + } + if (ExprIsBoolean(second)) { + return ExprBooleanGet(second) ? first : second; + } + return MakeExpr<ExprAnd>(std::move(first), std::move(second)); +} + +Expr MakeExprOr(Expr first, Expr second) { + if (ExprIsBoolean(first)) { + return ExprBooleanGet(first) ? first : second; + } + if (ExprIsBoolean(second)) { + return ExprBooleanGet(second) ? second : first; + } + return MakeExpr<ExprOr>(std::move(first), std::move(second)); +} + +bool ExprAreEqual(const Expr& first, const Expr& second) { + return (*first) == (*second); +} + +bool ExprAreOpposite(const Expr& first, const Expr& second) { + if (std::holds_alternative<ExprNot>(*first)) { + return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second); + } + if (std::holds_alternative<ExprNot>(*second)) { + return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first); + } + return false; +} + +bool ExprIsTrue(const Expr& first) { + if (ExprIsBoolean(first)) { + return ExprBooleanGet(first); + } + return false; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h new file mode 100644 index 000000000..4e8264367 --- /dev/null +++ b/src/video_core/shader/expr.h @@ -0,0 +1,156 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <variant> + +#include "video_core/engines/shader_bytecode.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::ConditionCode; +using Tegra::Shader::Pred; + +class ExprAnd; +class ExprBoolean; +class ExprCondCode; +class ExprGprEqual; +class ExprNot; +class ExprOr; +class ExprPredicate; +class ExprVar; + +using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, + ExprBoolean, ExprGprEqual>; +using Expr = std::shared_ptr<ExprData>; + +class ExprAnd final { +public: + explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} + + bool operator==(const ExprAnd& b) const; + bool operator!=(const ExprAnd& b) const; + + Expr operand1; + Expr operand2; +}; + +class ExprOr final { +public: + explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} + + bool operator==(const ExprOr& b) const; + bool operator!=(const ExprOr& b) const; + + Expr operand1; + Expr operand2; +}; + +class ExprNot final { +public: + explicit ExprNot(Expr a) : operand1{std::move(a)} {} + + bool operator==(const ExprNot& b) const; + bool operator!=(const ExprNot& b) const; + + Expr operand1; +}; + +class ExprVar final { +public: + explicit ExprVar(u32 index) : var_index{index} {} + + bool operator==(const ExprVar& b) const { + return var_index == b.var_index; + } + + bool operator!=(const ExprVar& b) const { + return !operator==(b); + } + + u32 var_index; +}; + +class ExprPredicate final { +public: + explicit ExprPredicate(u32 predicate) : predicate{predicate} {} + + bool operator==(const ExprPredicate& b) const { + return predicate == b.predicate; + } + + bool operator!=(const ExprPredicate& b) const { + return !operator==(b); + } + + u32 predicate; +}; + +class ExprCondCode final { +public: + explicit ExprCondCode(ConditionCode cc) : cc{cc} {} + + bool operator==(const ExprCondCode& b) const { + return cc == b.cc; + } + + bool operator!=(const ExprCondCode& b) const { + return !operator==(b); + } + + ConditionCode cc; +}; + +class ExprBoolean final { +public: + explicit ExprBoolean(bool val) : value{val} {} + + bool operator==(const ExprBoolean& b) const { + return value == b.value; + } + + bool operator!=(const ExprBoolean& b) const { + return !operator==(b); + } + + bool value; +}; + +class ExprGprEqual final { +public: + ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {} + + bool operator==(const ExprGprEqual& b) const { + return gpr == b.gpr && value == b.value; + } + + bool operator!=(const ExprGprEqual& b) const { + return !operator==(b); + } + + u32 gpr; + u32 value; +}; + +template <typename T, typename... Args> +Expr MakeExpr(Args&&... args) { + static_assert(std::is_convertible_v<T, ExprData>); + return std::make_shared<ExprData>(T(std::forward<Args>(args)...)); +} + +bool ExprAreEqual(const Expr& first, const Expr& second); + +bool ExprAreOpposite(const Expr& first, const Expr& second); + +Expr MakeExprNot(Expr first); + +Expr MakeExprAnd(Expr first, Expr second); + +Expr MakeExprOr(Expr first, Expr second); + +bool ExprIsTrue(const Expr& first); + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 338bab17c..54217e6a4 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -47,6 +47,7 @@ enum class OperationCode { FTrunc, /// (MetaArithmetic, float a) -> float FCastInteger, /// (MetaArithmetic, int a) -> float FCastUInteger, /// (MetaArithmetic, uint a) -> float + FSwizzleAdd, /// (float a, float b, uint mask) -> float IAdd, /// (MetaArithmetic, int a, int b) -> int IMul, /// (MetaArithmetic, int a, int b) -> int @@ -181,15 +182,8 @@ enum class OperationCode { VoteAny, /// (bool) -> bool VoteEqual, /// (bool) -> bool - ShuffleIndexed, /// (uint value, uint index, uint width) -> uint - ShuffleUp, /// (uint value, uint index, uint width) -> uint - ShuffleDown, /// (uint value, uint index, uint width) -> uint - ShuffleButterfly, /// (uint value, uint index, uint width) -> uint - - InRangeShuffleIndexed, /// (uint index, uint width) -> bool - InRangeShuffleUp, /// (uint index, uint width) -> bool - InRangeShuffleDown, /// (uint index, uint width) -> bool - InRangeShuffleButterfly, /// (uint index, uint width) -> bool + ThreadId, /// () -> uint + ShuffleIndexed, /// (uint value, uint index) -> uint Amount, }; @@ -230,62 +224,49 @@ using NodeBlock = std::vector<Node>; class Sampler { public: /// This constructor is for bound samplers - explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_bindless{false} {} + constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow) + : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} /// This constructor is for bindless samplers - explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::TextureType type, bool is_array, bool is_shadow) - : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, - is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} - - /// This constructor is for serialization/deserialization - explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_bindless) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_bindless{is_bindless} {} - - std::size_t GetOffset() const { + constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow) + : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, + is_shadow{is_shadow}, is_bindless{true} {} + + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetOffset() const { return offset; } - std::size_t GetIndex() const { - return index; + constexpr u32 GetBuffer() const { + return buffer; } - Tegra::Shader::TextureType GetType() const { + constexpr Tegra::Shader::TextureType GetType() const { return type; } - bool IsArray() const { + constexpr bool IsArray() const { return is_array; } - bool IsShadow() const { + constexpr bool IsShadow() const { return is_shadow; } - bool IsBindless() const { + constexpr bool IsBindless() const { return is_bindless; } - std::pair<u32, u32> GetBindlessCBuf() const { - return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; - } - - bool operator<(const Sampler& rhs) const { - return std::tie(index, offset, type, is_array, is_shadow, is_bindless) < - std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow, - rhs.is_bindless); - } - private: - /// Offset in TSC memory from which to read the sampler object, as specified by the sampling - /// instruction. - std::size_t offset{}; - std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. + u32 index{}; ///< Emulated index given for the this sampler. + u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. + u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). + Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. @@ -294,18 +275,13 @@ private: class Image final { public: - constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type) - : offset{offset}, index{index}, type{type}, is_bindless{false} {} + /// This constructor is for bound images + constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type) + : index{index}, offset{offset}, type{type} {} - constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::ImageType type) - : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, - is_bindless{true} {} - - constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, - bool is_bindless, bool is_written, bool is_read, bool is_atomic) - : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, - is_written{is_written}, is_read{is_read}, is_atomic{is_atomic} {} + /// This constructor is for bindless samplers + constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type) + : index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {} void MarkWrite() { is_written = true; @@ -321,12 +297,16 @@ public: is_atomic = true; } - constexpr std::size_t GetOffset() const { + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetOffset() const { return offset; } - constexpr std::size_t GetIndex() const { - return index; + constexpr u32 GetBuffer() const { + return buffer; } constexpr Tegra::Shader::ImageType GetType() const { @@ -349,18 +329,11 @@ public: return is_atomic; } - constexpr std::pair<u32, u32> GetBindlessCBuf() const { - return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; - } - - constexpr bool operator<(const Image& rhs) const { - return std::tie(offset, index, type, is_bindless) < - std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless); - } - private: - u64 offset{}; - std::size_t index{}; + u32 index{}; + u32 offset{}; + u32 buffer{}; + Tegra::Shader::ImageType type{}; bool is_bindless{}; bool is_written{}; @@ -410,7 +383,7 @@ public: explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} explicit OperationNode(OperationCode code, Meta meta) - : OperationNode(code, meta, std::vector<Node>{}) {} + : OperationNode(code, std::move(meta), std::vector<Node>{}) {} explicit OperationNode(OperationCode code, std::vector<Node> operands) : OperationNode(code, Meta{}, std::move(operands)) {} diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 2c357f310..1d9825c76 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -2,8 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <array> #include <cmath> -#include <unordered_map> #include "common/assert.h" #include "common/common_types.h" @@ -22,8 +23,9 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; -ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size) - : program_code{program_code}, main_offset{main_offset}, program_size{size} { +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker) + : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { Decode(); } @@ -137,7 +139,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); } -Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { +Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { const Node node = MakeNode<InternalFlagNode>(flag); if (negated) { return Operation(OperationCode::LogicalNegate, node); @@ -269,21 +271,24 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { } Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { - const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { - {PredCondition::LessThan, OperationCode::LogicalFLessThan}, - {PredCondition::Equal, OperationCode::LogicalFEqual}, - {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, - {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, - {PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, - {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, - {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, - {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, - {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, - {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, - {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}}; - - const auto comparison{PredicateComparisonTable.find(condition)}; - UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), + static constexpr std::array comparison_table{ + std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan}, + std::pair{PredCondition::Equal, OperationCode::LogicalFEqual}, + std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, + std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, + std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, + std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, + std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, + std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, + std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, + std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, + std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}, + }; + + const auto comparison = + std::find_if(comparison_table.cbegin(), comparison_table.cend(), + [condition](const auto entry) { return condition == entry.first; }); + UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), "Unknown predicate comparison operation"); Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); @@ -304,21 +309,24 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, Node op_b) { - const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { - {PredCondition::LessThan, OperationCode::LogicalILessThan}, - {PredCondition::Equal, OperationCode::LogicalIEqual}, - {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, - {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, - {PredCondition::NotEqual, OperationCode::LogicalINotEqual}, - {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, - {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, - {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, - {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, - {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, - {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}}; - - const auto comparison{PredicateComparisonTable.find(condition)}; - UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), + static constexpr std::array comparison_table{ + std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan}, + std::pair{PredCondition::Equal, OperationCode::LogicalIEqual}, + std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual}, + std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, + std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual}, + std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, + std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, + std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, + std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, + std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, + std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}, + }; + + const auto comparison = + std::find_if(comparison_table.cbegin(), comparison_table.cend(), + [condition](const auto entry) { return condition == entry.first; }); + UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), "Unknown predicate comparison operation"); Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), @@ -335,45 +343,52 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b) { - const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { - {PredCondition::LessThan, OperationCode::Logical2HLessThan}, - {PredCondition::Equal, OperationCode::Logical2HEqual}, - {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, - {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, - {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, - {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, - {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, - {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, - {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, - {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, - {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}}; - - const auto comparison{PredicateComparisonTable.find(condition)}; - UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), + static constexpr std::array comparison_table{ + std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan}, + std::pair{PredCondition::Equal, OperationCode::Logical2HEqual}, + std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, + std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, + std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, + std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, + std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, + std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, + std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, + std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, + std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}, + }; + + const auto comparison = + std::find_if(comparison_table.cbegin(), comparison_table.cend(), + [condition](const auto entry) { return condition == entry.first; }); + UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), "Unknown predicate comparison operation"); return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); } OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { - const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { - {PredOperation::And, OperationCode::LogicalAnd}, - {PredOperation::Or, OperationCode::LogicalOr}, - {PredOperation::Xor, OperationCode::LogicalXor}, + static constexpr std::array operation_table{ + OperationCode::LogicalAnd, + OperationCode::LogicalOr, + OperationCode::LogicalXor, }; - const auto op = PredicateOperationTable.find(operation); - UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation"); - return op->second; + const auto index = static_cast<std::size_t>(operation); + if (index >= operation_table.size()) { + UNIMPLEMENTED_MSG("Unknown predicate operation."); + return {}; + } + + return operation_table[index]; } -Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { +Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { switch (cc) { case Tegra::Shader::ConditionCode::NEU: return GetInternalFlag(InternalFlag::Zero, true); default: UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); - return GetPredicate(static_cast<u64>(Pred::NeverExecute)); + return MakeNode<PredicateNode>(Pred::NeverExecute, false); } } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6f666ee30..76a849818 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <list> #include <map> #include <optional> #include <set> @@ -15,6 +16,9 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/shader/ast.h" +#include "video_core/shader/compiler_settings.h" +#include "video_core/shader/const_buffer_locker.h" #include "video_core/shader/node.h" namespace VideoCommon::Shader { @@ -45,7 +49,7 @@ public: } u32 GetSize() const { - return max_offset + sizeof(float); + return max_offset + static_cast<u32>(sizeof(float)); } u32 GetMaxOffset() const { @@ -64,7 +68,8 @@ struct GlobalMemoryUsage { class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size); + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker); ~ShaderIR(); const std::map<u32, NodeBlock>& GetBasicBlocks() const { @@ -91,11 +96,11 @@ public: return used_cbufs; } - const std::set<Sampler>& GetSamplers() const { + const std::list<Sampler>& GetSamplers() const { return used_samplers; } - const std::map<u64, Image>& GetImages() const { + const std::list<Image>& GetImages() const { return used_images; } @@ -144,11 +149,38 @@ public: return disable_flow_stack; } - u32 ConvertAddressToNvidiaSpace(const u32 address) const { - return (address - main_offset) * sizeof(Tegra::Shader::Instruction); + bool IsDecompiled() const { + return decompiled; } + const ASTManager& GetASTManager() const { + return program_manager; + } + + ASTNode GetASTProgram() const { + return program_manager.GetProgram(); + } + + u32 GetASTNumVariables() const { + return program_manager.GetVariables(); + } + + u32 ConvertAddressToNvidiaSpace(u32 address) const { + return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); + } + + /// Returns a condition code evaluated from internal flags + Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; + private: + friend class ASTDecoder; + + struct SamplerInfo { + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + }; + void Decode(); NodeBlock DecodeRange(u32 begin, u32 end); @@ -213,7 +245,7 @@ private: /// Generates a node representing an output attribute. Keeps track of used attributes. Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); /// Generates a node representing an internal flag - Node GetInternalFlag(InternalFlag flag, bool negated = false); + Node GetInternalFlag(InternalFlag flag, bool negated = false) const; /// Generates a node representing a local memory address Node GetLocalMemory(Node address); /// Generates a node representing a shared memory address @@ -271,17 +303,13 @@ private: /// Returns a predicate combiner operation OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); - /// Returns a condition code evaluated from internal flags - Node GetConditionCode(Tegra::Shader::ConditionCode cc); - /// Accesses a texture sampler const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, - Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + std::optional<SamplerInfo> sampler_info); // Accesses a texture sampler for a bindless texture. const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, - Tegra::Shader::TextureType type, bool is_array, - bool is_shadow); + std::optional<SamplerInfo> sampler_info); /// Accesses an image. Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); @@ -289,9 +317,6 @@ private: /// Access a bindless image sampler. Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); - /// Tries to access an existing image, updating it's state as needed - Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type); - /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -302,7 +327,7 @@ private: const Node4& components); void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); + const Node4& components, bool ignore_mask = false); void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); @@ -316,7 +341,7 @@ private: bool is_array); Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array, bool is_aoffi); + bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless); Node4 GetTldCode(Tegra::Shader::Instruction instr); @@ -351,12 +376,16 @@ private: std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor) const; - std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory( - NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); + std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, + Tegra::Shader::Instruction instr, + bool is_write); const ProgramCode& program_code; const u32 main_offset; - const std::size_t program_size; + const CompilerSettings settings; + ConstBufferLocker& locker; + + bool decompiled{}; bool disable_flow_stack{}; u32 coverage_begin{}; @@ -364,14 +393,15 @@ private: std::map<u32, NodeBlock> basic_blocks; NodeBlock global_code; + ASTManager program_manager{true, true}; std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; std::set<Tegra::Shader::Attribute::Index> used_input_attributes; std::set<Tegra::Shader::Attribute::Index> used_output_attributes; std::map<u32, ConstBuffer> used_cbufs; - std::set<Sampler> used_samplers; - std::map<u64, Image> used_images; + std::list<Sampler> used_samplers; + std::list<Image> used_images; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; bool uses_layer{}; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 250afc6d6..1655ccf16 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -168,282 +168,6 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) } } -PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, - Tegra::Texture::ComponentType component_type, - bool is_srgb) { - // TODO(Subv): Properly implement this - switch (format) { - case Tegra::Texture::TextureFormat::A8R8G8B8: - if (is_srgb) { - return PixelFormat::RGBA8_SRGB; - } - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::ABGR8U; - case Tegra::Texture::ComponentType::SNORM: - return PixelFormat::ABGR8S; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::ABGR8UI; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::B5G6R5: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::B5G6R5U; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::A2B10G10R10: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::A2B10G10R10U; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::A1B5G5R5: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::A1B5G5R5U; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::R8: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::R8U; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::R8UI; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::G8R8: - // TextureFormat::G8R8 is actually ordered red then green, as such we can use - // PixelFormat::RG8U and PixelFormat::RG8S. This was tested with The Legend of Zelda: Breath - // of the Wild, which uses this format to render the hearts on the UI. - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::RG8U; - case Tegra::Texture::ComponentType::SNORM: - return PixelFormat::RG8S; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::R16_G16_B16_A16: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::RGBA16U; - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::RGBA16F; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::BF10GF11RF11: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::R11FG11FB10F; - default: - break; - } - case Tegra::Texture::TextureFormat::R32_G32_B32_A32: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::RGBA32F; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::RGBA32UI; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::R32_G32: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::RG32F; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::RG32UI; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::R32_G32_B32: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::RGB32F; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::R16: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::R16F; - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::R16U; - case Tegra::Texture::ComponentType::SNORM: - return PixelFormat::R16S; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::R16UI; - case Tegra::Texture::ComponentType::SINT: - return PixelFormat::R16I; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::R32: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::R32F; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::R32UI; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::ZF32: - return PixelFormat::Z32F; - case Tegra::Texture::TextureFormat::Z16: - return PixelFormat::Z16; - case Tegra::Texture::TextureFormat::S8Z24: - return PixelFormat::S8Z24; - case Tegra::Texture::TextureFormat::ZF32_X24S8: - return PixelFormat::Z32FS8; - case Tegra::Texture::TextureFormat::DXT1: - return is_srgb ? PixelFormat::DXT1_SRGB : PixelFormat::DXT1; - case Tegra::Texture::TextureFormat::DXT23: - return is_srgb ? PixelFormat::DXT23_SRGB : PixelFormat::DXT23; - case Tegra::Texture::TextureFormat::DXT45: - return is_srgb ? PixelFormat::DXT45_SRGB : PixelFormat::DXT45; - case Tegra::Texture::TextureFormat::DXN1: - return PixelFormat::DXN1; - case Tegra::Texture::TextureFormat::DXN2: - switch (component_type) { - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::DXN2UNORM; - case Tegra::Texture::ComponentType::SNORM: - return PixelFormat::DXN2SNORM; - default: - break; - } - break; - case Tegra::Texture::TextureFormat::BC7U: - return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U; - case Tegra::Texture::TextureFormat::BC6H_UF16: - return PixelFormat::BC6H_UF16; - case Tegra::Texture::TextureFormat::BC6H_SF16: - return PixelFormat::BC6H_SF16; - case Tegra::Texture::TextureFormat::ASTC_2D_4X4: - return is_srgb ? PixelFormat::ASTC_2D_4X4_SRGB : PixelFormat::ASTC_2D_4X4; - case Tegra::Texture::TextureFormat::ASTC_2D_5X4: - return is_srgb ? PixelFormat::ASTC_2D_5X4_SRGB : PixelFormat::ASTC_2D_5X4; - case Tegra::Texture::TextureFormat::ASTC_2D_5X5: - return is_srgb ? PixelFormat::ASTC_2D_5X5_SRGB : PixelFormat::ASTC_2D_5X5; - case Tegra::Texture::TextureFormat::ASTC_2D_8X8: - return is_srgb ? PixelFormat::ASTC_2D_8X8_SRGB : PixelFormat::ASTC_2D_8X8; - case Tegra::Texture::TextureFormat::ASTC_2D_8X5: - return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5; - case Tegra::Texture::TextureFormat::ASTC_2D_10X8: - return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8; - case Tegra::Texture::TextureFormat::R16_G16: - switch (component_type) { - case Tegra::Texture::ComponentType::FLOAT: - return PixelFormat::RG16F; - case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::RG16; - case Tegra::Texture::ComponentType::SNORM: - return PixelFormat::RG16S; - case Tegra::Texture::ComponentType::UINT: - return PixelFormat::RG16UI; - case Tegra::Texture::ComponentType::SINT: - return PixelFormat::RG16I; - default: - break; - } - break; - default: - break; - } - LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format), - static_cast<u32>(component_type)); - UNREACHABLE(); - return PixelFormat::ABGR8U; -} - -ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { - // TODO(Subv): Implement more component types - switch (type) { - case Tegra::Texture::ComponentType::UNORM: - return ComponentType::UNorm; - case Tegra::Texture::ComponentType::FLOAT: - return ComponentType::Float; - case Tegra::Texture::ComponentType::SNORM: - return ComponentType::SNorm; - case Tegra::Texture::ComponentType::UINT: - return ComponentType::UInt; - case Tegra::Texture::ComponentType::SINT: - return ComponentType::SInt; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type)); - UNREACHABLE(); - return ComponentType::UNorm; - } -} - -ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) { - // TODO(Subv): Implement more render targets - switch (format) { - case Tegra::RenderTargetFormat::RGBA8_UNORM: - case Tegra::RenderTargetFormat::RGBA8_SRGB: - case Tegra::RenderTargetFormat::BGRA8_UNORM: - case Tegra::RenderTargetFormat::BGRA8_SRGB: - case Tegra::RenderTargetFormat::RGB10_A2_UNORM: - case Tegra::RenderTargetFormat::R8_UNORM: - case Tegra::RenderTargetFormat::RG16_UNORM: - case Tegra::RenderTargetFormat::R16_UNORM: - case Tegra::RenderTargetFormat::B5G6R5_UNORM: - case Tegra::RenderTargetFormat::BGR5A1_UNORM: - case Tegra::RenderTargetFormat::RG8_UNORM: - case Tegra::RenderTargetFormat::RGBA16_UNORM: - return ComponentType::UNorm; - case Tegra::RenderTargetFormat::RGBA8_SNORM: - case Tegra::RenderTargetFormat::RG16_SNORM: - case Tegra::RenderTargetFormat::R16_SNORM: - case Tegra::RenderTargetFormat::RG8_SNORM: - return ComponentType::SNorm; - case Tegra::RenderTargetFormat::RGBA16_FLOAT: - case Tegra::RenderTargetFormat::RGBX16_FLOAT: - case Tegra::RenderTargetFormat::R11G11B10_FLOAT: - case Tegra::RenderTargetFormat::RGBA32_FLOAT: - case Tegra::RenderTargetFormat::RG32_FLOAT: - case Tegra::RenderTargetFormat::RG16_FLOAT: - case Tegra::RenderTargetFormat::R16_FLOAT: - case Tegra::RenderTargetFormat::R32_FLOAT: - return ComponentType::Float; - case Tegra::RenderTargetFormat::RGBA32_UINT: - case Tegra::RenderTargetFormat::RGBA16_UINT: - case Tegra::RenderTargetFormat::RG16_UINT: - case Tegra::RenderTargetFormat::R8_UINT: - case Tegra::RenderTargetFormat::R16_UINT: - case Tegra::RenderTargetFormat::RG32_UINT: - case Tegra::RenderTargetFormat::R32_UINT: - case Tegra::RenderTargetFormat::RGBA8_UINT: - return ComponentType::UInt; - case Tegra::RenderTargetFormat::RG16_SINT: - case Tegra::RenderTargetFormat::R16_SINT: - return ComponentType::SInt; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - return ComponentType::UNorm; - } -} - PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: @@ -458,22 +182,6 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat } } -ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) { - switch (format) { - case Tegra::DepthFormat::Z16_UNORM: - case Tegra::DepthFormat::S8_Z24_UNORM: - case Tegra::DepthFormat::Z24_S8_UNORM: - return ComponentType::UNorm; - case Tegra::DepthFormat::Z32_FLOAT: - case Tegra::DepthFormat::Z32_S8_X24_FLOAT: - return ComponentType::Float; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - return ComponentType::UNorm; - } -} - SurfaceType GetFormatType(PixelFormat pixel_format) { if (static_cast<std::size_t>(pixel_format) < static_cast<std::size_t>(PixelFormat::MaxColorFormat)) { @@ -510,6 +218,16 @@ bool IsPixelFormatASTC(PixelFormat format) { case PixelFormat::ASTC_2D_8X5_SRGB: case PixelFormat::ASTC_2D_10X8: case PixelFormat::ASTC_2D_10X8_SRGB: + case PixelFormat::ASTC_2D_6X6: + case PixelFormat::ASTC_2D_6X6_SRGB: + case PixelFormat::ASTC_2D_10X10: + case PixelFormat::ASTC_2D_10X10_SRGB: + case PixelFormat::ASTC_2D_12X12: + case PixelFormat::ASTC_2D_12X12_SRGB: + case PixelFormat::ASTC_2D_8X6: + case PixelFormat::ASTC_2D_8X6_SRGB: + case PixelFormat::ASTC_2D_6X5: + case PixelFormat::ASTC_2D_6X5_SRGB: return true; default: return false; @@ -530,6 +248,11 @@ bool IsPixelFormatSRGB(PixelFormat format) { case PixelFormat::ASTC_2D_5X4_SRGB: case PixelFormat::ASTC_2D_5X5_SRGB: case PixelFormat::ASTC_2D_10X8_SRGB: + case PixelFormat::ASTC_2D_6X6_SRGB: + case PixelFormat::ASTC_2D_10X10_SRGB: + case PixelFormat::ASTC_2D_12X12_SRGB: + case PixelFormat::ASTC_2D_8X6_SRGB: + case PixelFormat::ASTC_2D_6X5_SRGB: return true; default: return false; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 1e1c432a5..0d17a93ed 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -67,45 +67,47 @@ enum class PixelFormat { DXT23_SRGB = 49, DXT45_SRGB = 50, BC7U_SRGB = 51, - ASTC_2D_4X4_SRGB = 52, - ASTC_2D_8X8_SRGB = 53, - ASTC_2D_8X5_SRGB = 54, - ASTC_2D_5X4_SRGB = 55, - ASTC_2D_5X5 = 56, - ASTC_2D_5X5_SRGB = 57, - ASTC_2D_10X8 = 58, - ASTC_2D_10X8_SRGB = 59, + R4G4B4A4U = 52, + ASTC_2D_4X4_SRGB = 53, + ASTC_2D_8X8_SRGB = 54, + ASTC_2D_8X5_SRGB = 55, + ASTC_2D_5X4_SRGB = 56, + ASTC_2D_5X5 = 57, + ASTC_2D_5X5_SRGB = 58, + ASTC_2D_10X8 = 59, + ASTC_2D_10X8_SRGB = 60, + ASTC_2D_6X6 = 61, + ASTC_2D_6X6_SRGB = 62, + ASTC_2D_10X10 = 63, + ASTC_2D_10X10_SRGB = 64, + ASTC_2D_12X12 = 65, + ASTC_2D_12X12_SRGB = 66, + ASTC_2D_8X6 = 67, + ASTC_2D_8X6_SRGB = 68, + ASTC_2D_6X5 = 69, + ASTC_2D_6X5_SRGB = 70, + E5B9G9R9F = 71, MaxColorFormat, // Depth formats - Z32F = 60, - Z16 = 61, + Z32F = 72, + Z16 = 73, MaxDepthFormat, // DepthStencil formats - Z24S8 = 62, - S8Z24 = 63, - Z32FS8 = 64, + Z24S8 = 74, + S8Z24 = 75, + Z32FS8 = 76, MaxDepthStencilFormat, Max = MaxDepthStencilFormat, Invalid = 255, }; - static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); -enum class ComponentType { - Invalid = 0, - SNorm = 1, - UNorm = 2, - SInt = 3, - UInt = 4, - Float = 5, -}; - enum class SurfaceType { ColorTexture = 0, Depth = 1, @@ -177,6 +179,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 2, // DXT23_SRGB 2, // DXT45_SRGB 2, // BC7U_SRGB + 0, // R4G4B4A4U 2, // ASTC_2D_4X4_SRGB 2, // ASTC_2D_8X8_SRGB 2, // ASTC_2D_8X5_SRGB @@ -185,6 +188,17 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 2, // ASTC_2D_5X5_SRGB 2, // ASTC_2D_10X8 2, // ASTC_2D_10X8_SRGB + 2, // ASTC_2D_6X6 + 2, // ASTC_2D_6X6_SRGB + 2, // ASTC_2D_10X10 + 2, // ASTC_2D_10X10_SRGB + 2, // ASTC_2D_12X12 + 2, // ASTC_2D_12X12_SRGB + 2, // ASTC_2D_8X6 + 2, // ASTC_2D_8X6_SRGB + 2, // ASTC_2D_6X5 + 2, // ASTC_2D_6X5_SRGB + 0, // E5B9G9R9F 0, // Z32F 0, // Z16 0, // Z24S8 @@ -261,6 +275,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 4, // DXT23_SRGB 4, // DXT45_SRGB 4, // BC7U_SRGB + 1, // R4G4B4A4U 4, // ASTC_2D_4X4_SRGB 8, // ASTC_2D_8X8_SRGB 8, // ASTC_2D_8X5_SRGB @@ -269,6 +284,17 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 5, // ASTC_2D_5X5_SRGB 10, // ASTC_2D_10X8 10, // ASTC_2D_10X8_SRGB + 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_SRGB + 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_SRGB + 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_SRGB + 8, // ASTC_2D_8X6 + 8, // ASTC_2D_8X6_SRGB + 6, // ASTC_2D_6X5 + 6, // ASTC_2D_6X5_SRGB + 1, // E5B9G9R9F 1, // Z32F 1, // Z16 1, // Z24S8 @@ -285,71 +311,83 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG32UI - 1, // RGBX16F - 1, // R32UI - 8, // ASTC_2D_8X8 - 5, // ASTC_2D_8X5 - 4, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 4, // ASTC_2D_4X4_SRGB - 8, // ASTC_2D_8X8_SRGB - 5, // ASTC_2D_8X5_SRGB - 4, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 - 5, // ASTC_2D_5X5_SRGB - 8, // ASTC_2D_10X8 - 8, // ASTC_2D_10X8_SRGB - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 + 1, // ABGR8U + 1, // ABGR8S + 1, // ABGR8UI + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U + 1, // R8UI + 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI + 1, // R11FG11FB10F + 1, // RGBA32UI + 4, // DXT1 + 4, // DXT23 + 4, // DXT45 + 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM + 4, // BC7U + 4, // BC6H_UF16 + 4, // BC6H_SF16 + 4, // ASTC_2D_4X4 + 1, // BGRA8 + 1, // RGBA32F + 1, // RG32F + 1, // R32F + 1, // R16F + 1, // R16U + 1, // R16S + 1, // R16UI + 1, // R16I + 1, // RG16 + 1, // RG16F + 1, // RG16UI + 1, // RG16I + 1, // RG16S + 1, // RGB32F + 1, // RGBA8_SRGB + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // RGBX16F + 1, // R32UI + 8, // ASTC_2D_8X8 + 5, // ASTC_2D_8X5 + 4, // ASTC_2D_5X4 + 1, // BGRA8_SRGB + 4, // DXT1_SRGB + 4, // DXT23_SRGB + 4, // DXT45_SRGB + 4, // BC7U_SRGB + 1, // R4G4B4A4U + 4, // ASTC_2D_4X4_SRGB + 8, // ASTC_2D_8X8_SRGB + 5, // ASTC_2D_8X5_SRGB + 4, // ASTC_2D_5X4_SRGB + 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_SRGB + 8, // ASTC_2D_10X8 + 8, // ASTC_2D_10X8_SRGB + 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_SRGB + 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_SRGB + 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_SRGB + 6, // ASTC_2D_8X6 + 6, // ASTC_2D_8X6_SRGB + 5, // ASTC_2D_6X5 + 5, // ASTC_2D_6X5_SRGB + 1, // E5B9G9R9F + 1, // Z32F + 1, // Z16 + 1, // Z24S8 + 1, // S8Z24 + 1, // Z32FS8 }}; static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { @@ -413,6 +451,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 128, // DXT23_SRGB 128, // DXT45_SRGB 128, // BC7U + 16, // R4G4B4A4U 128, // ASTC_2D_4X4_SRGB 128, // ASTC_2D_8X8_SRGB 128, // ASTC_2D_8X5_SRGB @@ -421,6 +460,17 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 128, // ASTC_2D_5X5_SRGB 128, // ASTC_2D_10X8 128, // ASTC_2D_10X8_SRGB + 128, // ASTC_2D_6X6 + 128, // ASTC_2D_6X6_SRGB + 128, // ASTC_2D_10X10 + 128, // ASTC_2D_10X10_SRGB + 128, // ASTC_2D_12X12 + 128, // ASTC_2D_12X12_SRGB + 128, // ASTC_2D_8X6 + 128, // ASTC_2D_8X6_SRGB + 128, // ASTC_2D_6X5 + 128, // ASTC_2D_6X5_SRGB + 32, // E5B9G9R9F 32, // Z32F 16, // Z16 32, // Z24S8 @@ -504,6 +554,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table SurfaceCompression::Compressed, // DXT23_SRGB SurfaceCompression::Compressed, // DXT45_SRGB SurfaceCompression::Compressed, // BC7U_SRGB + SurfaceCompression::None, // R4G4B4A4U SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB @@ -512,6 +563,17 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB SurfaceCompression::Converted, // ASTC_2D_10X8 SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB + SurfaceCompression::Converted, // ASTC_2D_6X6 + SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB + SurfaceCompression::Converted, // ASTC_2D_10X10 + SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB + SurfaceCompression::Converted, // ASTC_2D_12X12 + SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X6 + SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB + SurfaceCompression::Converted, // ASTC_2D_6X5 + SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB + SurfaceCompression::None, // E5B9G9R9F SurfaceCompression::None, // Z32F SurfaceCompression::None, // Z16 SurfaceCompression::None, // Z24S8 @@ -537,18 +599,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format); PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format); -PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, - Tegra::Texture::ComponentType component_type, - bool is_srgb); - -ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type); - -ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format); - PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format); -ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format); - SurfaceType GetFormatType(PixelFormat pixel_format); bool IsPixelFormatASTC(PixelFormat format); diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp new file mode 100644 index 000000000..271e67533 --- /dev/null +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -0,0 +1,208 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/texture_cache/format_lookup_table.h" + +namespace VideoCommon { + +using Tegra::Texture::ComponentType; +using Tegra::Texture::TextureFormat; +using VideoCore::Surface::PixelFormat; + +namespace { + +constexpr auto SNORM = ComponentType::SNORM; +constexpr auto UNORM = ComponentType::UNORM; +constexpr auto SINT = ComponentType::SINT; +constexpr auto UINT = ComponentType::UINT; +constexpr auto SNORM_FORCE_FP16 = ComponentType::SNORM_FORCE_FP16; +constexpr auto UNORM_FORCE_FP16 = ComponentType::UNORM_FORCE_FP16; +constexpr auto FLOAT = ComponentType::FLOAT; +constexpr bool C = false; // Normal color +constexpr bool S = true; // Srgb + +struct Table { + constexpr Table(TextureFormat texture_format, bool is_srgb, ComponentType red_component, + ComponentType green_component, ComponentType blue_component, + ComponentType alpha_component, PixelFormat pixel_format) + : texture_format{texture_format}, pixel_format{pixel_format}, red_component{red_component}, + green_component{green_component}, blue_component{blue_component}, + alpha_component{alpha_component}, is_srgb{is_srgb} {} + + TextureFormat texture_format; + PixelFormat pixel_format; + ComponentType red_component; + ComponentType green_component; + ComponentType blue_component; + ComponentType alpha_component; + bool is_srgb; +}; +constexpr std::array<Table, 74> DefinitionTable = {{ + {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, + {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, + {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, + {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA8_SRGB}, + + {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5U}, + + {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10U}, + + {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5U}, + + {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R4G4B4A4U}, + + {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8U}, + {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8UI}, + + {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, + {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, + + {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, + {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, + {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, + + {TextureFormat::R16_G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG16F}, + {TextureFormat::R16_G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG16}, + {TextureFormat::R16_G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG16S}, + {TextureFormat::R16_G16, C, UINT, UINT, UINT, UINT, PixelFormat::RG16UI}, + {TextureFormat::R16_G16, C, SINT, SINT, SINT, SINT, PixelFormat::RG16I}, + + {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16F}, + {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16U}, + {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16S}, + {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16UI}, + {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16I}, + + {TextureFormat::BF10GF11RF11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R11FG11FB10F}, + + {TextureFormat::R32_G32_B32_A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA32F}, + {TextureFormat::R32_G32_B32_A32, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA32UI}, + + {TextureFormat::R32_G32_B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGB32F}, + + {TextureFormat::R32_G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG32F}, + {TextureFormat::R32_G32, C, UINT, UINT, UINT, UINT, PixelFormat::RG32UI}, + + {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F}, + {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI}, + + {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F}, + + {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, + {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, + {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, + {TextureFormat::ZF32_X24S8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z32FS8}, + + {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, + {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB}, + + {TextureFormat::DXT23, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23}, + {TextureFormat::DXT23, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23_SRGB}, + + {TextureFormat::DXT45, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45}, + {TextureFormat::DXT45, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45_SRGB}, + + // TODO: Use a different pixel format for SNORM + {TextureFormat::DXN1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN1}, + {TextureFormat::DXN1, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN1}, + + {TextureFormat::DXN2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN2UNORM}, + {TextureFormat::DXN2, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN2SNORM}, + + {TextureFormat::BC7U, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U}, + {TextureFormat::BC7U, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U_SRGB}, + + {TextureFormat::BC6H_SF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SF16}, + {TextureFormat::BC6H_UF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UF16}, + + {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4}, + {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB}, + + {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4}, + {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB}, + + {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5}, + {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB}, + + {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8}, + {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB}, + + {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5}, + {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB}, + + {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8}, + {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB}, + + {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6}, + {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB}, + + {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10}, + {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB}, + + {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12}, + {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB}, + + {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6}, + {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB}, + + {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5}, + {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, +}}; + +} // Anonymous namespace + +FormatLookupTable::FormatLookupTable() { + table.fill(static_cast<u8>(PixelFormat::Invalid)); + + for (const auto& entry : DefinitionTable) { + table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, + entry.green_component, entry.blue_component, entry.alpha_component)] = + static_cast<u8>(entry.pixel_format); + } +} + +PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, + ComponentType red_component, + ComponentType green_component, + ComponentType blue_component, + ComponentType alpha_component) const noexcept { + const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex( + format, is_srgb, red_component, green_component, blue_component, alpha_component)]); + // [[likely]] + if (pixel_format != PixelFormat::Invalid) { + return pixel_format; + } + UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", + static_cast<int>(format), is_srgb, static_cast<int>(red_component), + static_cast<int>(green_component), static_cast<int>(blue_component), + static_cast<int>(alpha_component)); + return PixelFormat::ABGR8U; +} + +void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component, + ComponentType green_component, ComponentType blue_component, + ComponentType alpha_component, PixelFormat pixel_format) {} + +std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb, + ComponentType red_component, + ComponentType green_component, + ComponentType blue_component, + ComponentType alpha_component) noexcept { + const auto format_index = static_cast<std::size_t>(format); + const auto red_index = static_cast<std::size_t>(red_component); + const auto green_index = static_cast<std::size_t>(red_component); + const auto blue_index = static_cast<std::size_t>(red_component); + const auto alpha_index = static_cast<std::size_t>(red_component); + const std::size_t srgb_index = is_srgb ? 1 : 0; + + return format_index * PerFormat + + srgb_index * PerComponent * PerComponent * PerComponent * PerComponent + + alpha_index * PerComponent * PerComponent * PerComponent + + blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index; +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h new file mode 100644 index 000000000..aa77e0a5a --- /dev/null +++ b/src/video_core/texture_cache/format_lookup_table.h @@ -0,0 +1,51 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <limits> +#include "video_core/surface.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +class FormatLookupTable { +public: + explicit FormatLookupTable(); + + VideoCore::Surface::PixelFormat GetPixelFormat( + Tegra::Texture::TextureFormat format, bool is_srgb, + Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component, + Tegra::Texture::ComponentType blue_component, + Tegra::Texture::ComponentType alpha_component) const noexcept; + +private: + static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max()); + + static constexpr std::size_t NumTextureFormats = 128; + + static constexpr std::size_t PerComponent = 8; + static constexpr std::size_t PerComponents2 = PerComponent * PerComponent; + static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent; + static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent; + static constexpr std::size_t PerFormat = PerComponents4 * 2; + + static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb, + Tegra::Texture::ComponentType red_component, + Tegra::Texture::ComponentType green_component, + Tegra::Texture::ComponentType blue_component, + Tegra::Texture::ComponentType alpha_component) noexcept; + + void Set(Tegra::Texture::TextureFormat format, bool is_srgb, + Tegra::Texture::ComponentType red_component, + Tegra::Texture::ComponentType green_component, + Tegra::Texture::ComponentType blue_component, + Tegra::Texture::ComponentType alpha_component, + VideoCore::Surface::PixelFormat pixel_format); + + std::array<u8, NumTextureFormats * PerFormat> table; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 683c49207..829268b4c 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/algorithm.h" #include "common/assert.h" #include "common/common_types.h" #include "common/microprofile.h" diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 5e497e49f..1bed82898 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -4,12 +4,11 @@ #pragma once -#include <algorithm> +#include <optional> +#include <tuple> #include <unordered_map> #include <vector> -#include "common/assert.h" -#include "common/binary_find.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/morton.h" diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 1e4d3fb79..858e17e08 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -2,24 +2,23 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <map> +#include <algorithm> +#include <string> +#include <tuple> #include "common/alignment.h" #include "common/bit_util.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/surface.h" +#include "video_core/texture_cache/format_lookup_table.h" #include "video_core/texture_cache/surface_params.h" namespace VideoCommon { -using VideoCore::Surface::ComponentTypeFromDepthFormat; -using VideoCore::Surface::ComponentTypeFromRenderTarget; -using VideoCore::Surface::ComponentTypeFromTexture; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -using VideoCore::Surface::PixelFormatFromTextureFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTargetFromTextureType; using VideoCore::Surface::SurfaceType; @@ -69,7 +68,8 @@ constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { } // Anonymous namespace -SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& tic, +SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table, + const Tegra::Texture::TICEntry& tic, const VideoCommon::Shader::Sampler& entry) { SurfaceParams params; params.is_tiled = tic.IsTiled(); @@ -78,8 +78,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& ti params.block_height = params.is_tiled ? tic.BlockHeight() : 0, params.block_depth = params.is_tiled ? tic.BlockDepth() : 0, params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; - params.pixel_format = - PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion); + params.pixel_format = lookup_table.GetPixelFormat( + tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); params.type = GetFormatType(params.pixel_format); if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { switch (params.pixel_format) { @@ -99,7 +99,6 @@ SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& ti } params.type = GetFormatType(params.pixel_format); } - params.component_type = ComponentTypeFromTexture(tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. if (tic.IsBuffer()) { @@ -128,7 +127,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& ti return params; } -SurfaceParams SurfaceParams::CreateForImage(const Tegra::Texture::TICEntry& tic, +SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table, + const Tegra::Texture::TICEntry& tic, const VideoCommon::Shader::Image& entry) { SurfaceParams params; params.is_tiled = tic.IsTiled(); @@ -137,10 +137,9 @@ SurfaceParams SurfaceParams::CreateForImage(const Tegra::Texture::TICEntry& tic, params.block_height = params.is_tiled ? tic.BlockHeight() : 0, params.block_depth = params.is_tiled ? tic.BlockDepth() : 0, params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; - params.pixel_format = - PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion); + params.pixel_format = lookup_table.GetPixelFormat( + tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); params.type = GetFormatType(params.pixel_format); - params.component_type = ComponentTypeFromTexture(tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); params.target = ImageTypeToSurfaceTarget(entry.GetType()); // TODO: on 1DBuffer we should use the tic info. @@ -181,7 +180,6 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.block_depth = std::min(block_depth, 5U); params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromDepthFormat(format); - params.component_type = ComponentTypeFromDepthFormat(format); params.type = GetFormatType(params.pixel_format); params.width = zeta_width; params.height = zeta_height; @@ -206,7 +204,6 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.block_depth = config.memory_layout.block_depth; params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); - params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); if (params.is_tiled) { params.pitch = 0; @@ -236,7 +233,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); - params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); params.width = config.width; params.height = config.height; @@ -355,10 +351,10 @@ std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size bool SurfaceParams::operator==(const SurfaceParams& rhs) const { return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, - height, depth, pitch, num_levels, pixel_format, component_type, type, target) == + height, depth, pitch, num_levels, pixel_format, type, target) == std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, - rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target); + rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target); } std::string SurfaceParams::TargetName() const { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index c58e7f8a4..709aa0dc2 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -16,16 +16,20 @@ namespace VideoCommon { +class FormatLookupTable; + using VideoCore::Surface::SurfaceCompression; class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. - static SurfaceParams CreateForTexture(const Tegra::Texture::TICEntry& tic, + static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table, + const Tegra::Texture::TICEntry& tic, const VideoCommon::Shader::Sampler& entry); /// Creates SurfaceCachedParams from an image configuration. - static SurfaceParams CreateForImage(const Tegra::Texture::TICEntry& tic, + static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table, + const Tegra::Texture::TICEntry& tic, const VideoCommon::Shader::Image& entry); /// Creates SurfaceCachedParams for a depth buffer configuration. @@ -248,7 +252,6 @@ public: u32 num_levels; u32 emulated_levels; VideoCore::Surface::PixelFormat pixel_format; - VideoCore::Surface::ComponentType component_type; VideoCore::Surface::SurfaceType type; VideoCore::Surface::SurfaceTarget target; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 877c6635d..41309ebea 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -29,6 +29,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" #include "video_core/texture_cache/copy_params.h" +#include "video_core/texture_cache/format_lookup_table.h" #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" @@ -62,10 +63,10 @@ public: } } - /*** - * `Guard` guarantees that rendertargets don't unregister themselves if the + /** + * Guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. - ***/ + */ void GuardRenderTargets(bool new_guard) { guard_render_targets = new_guard; } @@ -96,7 +97,7 @@ public: if (!gpu_addr) { return {}; } - const auto params{SurfaceParams::CreateForTexture(tic, entry)}; + const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; const auto [surface, view] = GetSurface(gpu_addr, params, true, false); if (guard_samplers) { sampled_textures.push_back(surface); @@ -111,7 +112,7 @@ public: if (!gpu_addr) { return {}; } - const auto params{SurfaceParams::CreateForImage(tic, entry)}; + const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; const auto [surface, view] = GetSurface(gpu_addr, params, true, false); if (guard_samplers) { sampled_textures.push_back(surface); @@ -224,8 +225,13 @@ public: const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Tegra::Engines::Fermi2D::Config& copy_config) { std::lock_guard lock{mutex}; - std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config); - std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config); + SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config); + SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); + const GPUVAddr src_gpu_addr = src_config.Address(); + const GPUVAddr dst_gpu_addr = dst_config.Address(); + DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); + std::pair<TSurface, TView> dst_surface = GetSurface(dst_gpu_addr, dst_params, true, false); + std::pair<TSurface, TView> src_surface = GetSurface(src_gpu_addr, src_params, true, false); ImageBlit(src_surface.second, dst_surface.second, copy_config); dst_surface.first->MarkAsModified(true, Tick()); } @@ -282,7 +288,7 @@ protected: const Tegra::Engines::Fermi2D::Config& copy_config) = 0; // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture - // and reading it from a sepparate buffer. + // and reading it from a separate buffer. virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void ManageRenderTargetUnregister(TSurface& surface) { @@ -357,13 +363,37 @@ private: BufferCopy = 3, }; + enum class DeductionType : u32 { + DeductionComplete, + DeductionIncomplete, + DeductionFailed, + }; + + struct Deduction { + DeductionType type{DeductionType::DeductionFailed}; + TSurface surface{}; + + bool Failed() const { + return type == DeductionType::DeductionFailed; + } + + bool Incomplete() const { + return type == DeductionType::DeductionIncomplete; + } + + bool IsDepth() const { + return surface->GetSurfaceParams().IsPixelFormatZeta(); + } + }; + /** - * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. - * @param untopological, tells the recycler that the texture has no way to match the overlaps - * due to topological reasons. + * Takes care of selecting a proper strategy to deal with a texture recycle. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. + * @param untopological Indicates to the recycler that the texture has no way + * to match the overlaps due to topological reasons. **/ RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { @@ -374,7 +404,7 @@ private: if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; } - for (auto s : overlaps) { + for (const auto& s : overlaps) { const auto& s_params = s->GetSurfaceParams(); if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -391,16 +421,19 @@ private: } /** - * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in - *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the - *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the - *new surface from that data. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. - * @param preserve_contents, tells if the new surface should be loaded from meory or left blank - * @param untopological, tells the recycler that the texture has no way to match the overlaps - * due to topological reasons. + * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented + * strategies: Ignore and Flush. + * + * - Ignore: Just unregisters all the overlaps and loads the new texture. + * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters for the new surface. + * @param gpu_addr The starting address of the new surface. + * @param preserve_contents Indicates that the new surface should be loaded from memory or left + * blank. + * @param untopological Indicates to the recycler that the texture has no way to match the + * overlaps due to topological reasons. **/ std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, @@ -437,10 +470,12 @@ private: } /** - * `RebuildSurface` this method takes a single surface and recreates into another that - * may differ in format, target or width alingment. - * @param current_surface, the registered surface in the cache which we want to convert. - * @param params, the new surface params which we'll use to recreate the surface. + * Takes a single surface and recreates into another that may differ in + * format, target or width alignment. + * + * @param current_surface The registered surface in the cache which we want to convert. + * @param params The new surface params which we'll use to recreate the surface. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, bool is_render) { @@ -451,15 +486,13 @@ private: GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { SurfaceParams new_params = params; new_params.pixel_format = cr_params.pixel_format; - new_params.component_type = cr_params.component_type; new_params.type = cr_params.type; new_surface = GetUncachedSurface(gpu_addr, new_params); } else { new_surface = GetUncachedSurface(gpu_addr, params); } const auto& final_params = new_surface->GetSurfaceParams(); - if (cr_params.type != final_params.type || - (cr_params.component_type != final_params.component_type)) { + if (cr_params.type != final_params.type) { BufferCopy(current_surface, new_surface); } else { std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); @@ -474,12 +507,14 @@ private: } /** - * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's - * params if it's an exact match, we return the main view of the registered surface. If it's - * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats + * Takes a single surface and checks with the new surface's params if it's an exact + * match, we return the main view of the registered surface. If its formats don't + * match, we rebuild the surface. We call this last method a `Mirage`. If formats * match but the targets don't, we create an overview View of the registered surface. - * @param current_surface, the registered surface in the cache which we want to convert. - * @param params, the new surface params which we want to check. + * + * @param current_surface The registered surface in the cache which we want to convert. + * @param params The new surface params which we want to check. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, const SurfaceParams& params, bool is_render) { @@ -501,13 +536,14 @@ private: } /** - * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface - * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps - * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface - * for them, else we return nothing. - * @param overlaps, the overlapping surfaces registered in the cache. - * @param params, the paremeters on the new surface. - * @param gpu_addr, the starting address of the new surface. + * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate + * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps + * of the new surface, if they all match we end up recreating a surface for them, + * else we return nothing. + * + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. **/ std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, const SurfaceParams& params, @@ -547,7 +583,7 @@ private: } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { return {}; } - for (auto surface : overlaps) { + for (const auto& surface : overlaps) { Unregister(surface); } new_surface->MarkAsModified(modified, Tick()); @@ -556,19 +592,27 @@ private: } /** - * `GetSurface` gets the starting address and parameters of a candidate surface and tries - * to find a matching surface within the cache. This is done in 3 big steps. The first is to - * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. - * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from - * memory else we move to step 3. Step 3 consists on figuring the relationship between the - * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many - * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the - * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to - * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface - * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. - * @param gpu_addr, the starting address of the candidate surface. - * @param params, the paremeters on the candidate surface. - * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. + * Gets the starting address and parameters of a candidate surface and tries + * to find a matching surface within the cache. This is done in 3 big steps: + * + * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. + * + * 2. Check if there are any overlaps at all, if there are none, we just load the texture from + * memory else we move to step 3. + * + * 3. Consists of figuring out the relationship between the candidate texture and the + * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If + * there's many, we just try to reconstruct a new surface out of them based on the + * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we + * have to check if the candidate is a view (layer/mipmap) of the overlap or if the + * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct + * a new surface. + * + * @param gpu_addr The starting address of the candidate surface. + * @param params The parameters on the candidate surface. + * @param preserve_contents Indicates that the new surface should be loaded from memory or + * left blank. + * @param is_render Whether or not the surface is a render target. **/ std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { @@ -623,7 +667,7 @@ private: // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails - // inmediatly recycle the texture + // immediately recycle the texture for (const auto& surface : overlaps) { const auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { @@ -691,6 +735,123 @@ private: MatchTopologyResult::FullMatch); } + /** + * Gets the starting address and parameters of a candidate surface and tries to find a + * matching surface within the cache that's similar to it. If there are many textures + * or the texture found if entirely incompatible, it will fail. If no texture is found, the + * blit will be unsuccessful. + * + * @param gpu_addr The starting address of the candidate surface. + * @param params The parameters on the candidate surface. + **/ + Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { + const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; + + if (!cache_addr) { + Deduction result{}; + result.type = DeductionType::DeductionFailed; + return result; + } + + if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { + TSurface& current_surface = iter->second; + const auto topological_result = current_surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { + Deduction result{}; + result.type = DeductionType::DeductionFailed; + return result; + } + const auto struct_result = current_surface->MatchesStructure(params); + if (struct_result != MatchStructureResult::None && + current_surface->MatchTarget(params.target)) { + Deduction result{}; + result.type = DeductionType::DeductionComplete; + result.surface = current_surface; + return result; + } + } + + const std::size_t candidate_size = params.GetGuestSizeInBytes(); + auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + + if (overlaps.empty()) { + Deduction result{}; + result.type = DeductionType::DeductionIncomplete; + return result; + } + + if (overlaps.size() > 1) { + Deduction result{}; + result.type = DeductionType::DeductionFailed; + return result; + } else { + Deduction result{}; + result.type = DeductionType::DeductionComplete; + result.surface = overlaps[0]; + return result; + } + } + + /** + * Gets the a source and destination starting address and parameters, + * and tries to deduce if they are supposed to be depth textures. If so, their + * parameters are modified and fixed into so. + * + * @param src_params The parameters of the candidate surface. + * @param dst_params The parameters of the destination surface. + * @param src_gpu_addr The starting address of the candidate surface. + * @param dst_gpu_addr The starting address of the destination surface. + **/ + void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, + const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { + auto deduced_src = DeduceSurface(src_gpu_addr, src_params); + auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); + if (deduced_src.Failed() || deduced_dst.Failed()) { + return; + } + + const bool incomplete_src = deduced_src.Incomplete(); + const bool incomplete_dst = deduced_dst.Incomplete(); + + if (incomplete_src && incomplete_dst) { + return; + } + + const bool any_incomplete = incomplete_src || incomplete_dst; + + if (!any_incomplete) { + if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) { + return; + } + } else { + if (incomplete_src && !(deduced_dst.IsDepth())) { + return; + } + + if (incomplete_dst && !(deduced_src.IsDepth())) { + return; + } + } + + const auto inherit_format = [](SurfaceParams& to, TSurface from) { + const SurfaceParams& params = from->GetSurfaceParams(); + to.pixel_format = params.pixel_format; + to.type = params.type; + }; + // Now we got the cases where one or both is Depth and the other is not known + if (!incomplete_src) { + inherit_format(src_params, deduced_src.surface); + } else { + inherit_format(src_params, deduced_dst.surface); + } + if (!incomplete_dst) { + inherit_format(dst_params, deduced_dst.surface); + } else { + inherit_format(dst_params, deduced_src.surface); + } + } + std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { auto new_surface{GetUncachedSurface(gpu_addr, params)}; @@ -793,6 +954,8 @@ private: VideoCore::RasterizerInterface& rasterizer; + FormatLookupTable format_lookup_table; + u64 ticks{}; // Guards the cache for protection conflicts. diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index a9b8f69af..33bd31865 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -92,11 +92,11 @@ private: const unsigned int mask = 1 << m_NextBit++; // clear the bit - *m_CurByte &= ~mask; + *m_CurByte &= static_cast<unsigned char>(~mask); // Write the bit, if necessary if (b) - *m_CurByte |= mask; + *m_CurByte |= static_cast<unsigned char>(mask); // Next byte? if (m_NextBit >= 8) { @@ -137,7 +137,7 @@ public: } uint64_t mask = (1 << (end - start + 1)) - 1; - return (m_Bits >> start) & mask; + return (m_Bits >> start) & static_cast<IntType>(mask); } private: @@ -422,7 +422,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { TexelWeightParams params; // Read the entire block mode all at once - uint16_t modeBits = strm.ReadBits(11); + uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11)); // Does this match the void extent block mode? if ((modeBits & 0x01FF) == 0x1FC) { @@ -625,10 +625,10 @@ static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint } // Decode the RGBA components and renormalize them to the range [0, 255] - uint16_t r = strm.ReadBits(16); - uint16_t g = strm.ReadBits(16); - uint16_t b = strm.ReadBits(16); - uint16_t a = strm.ReadBits(16); + uint16_t r = static_cast<uint16_t>(strm.ReadBits(16)); + uint16_t g = static_cast<uint16_t>(strm.ReadBits(16)); + uint16_t b = static_cast<uint16_t>(strm.ReadBits(16)); + uint16_t a = static_cast<uint16_t>(strm.ReadBits(16)); uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | (static_cast<uint32_t>(a) & 0xFF00) << 16; @@ -656,7 +656,7 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { return 0; if (toBit == 0) return 0; - IntType v = val & ((1 << numBits) - 1); + IntType v = val & static_cast<IntType>((1 << numBits) - 1); IntType res = v; uint32_t reslen = numBits; while (reslen < toBit) { @@ -666,8 +666,8 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { comp = numBits - newshift; numBits = newshift; } - res <<= numBits; - res |= v >> comp; + res = static_cast<IntType>(res << numBits); + res = static_cast<IntType>(res | (v >> comp)); reslen += numBits; } return res; @@ -681,9 +681,10 @@ protected: public: Pixel() = default; - Pixel(ChannelType a, ChannelType r, ChannelType g, ChannelType b, unsigned bitDepth = 8) + Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8) : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, - color{a, r, g, b} {} + color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), + static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} // Changes the depth of each pixel. This scales the values to // the appropriate bit depth by either truncating the least @@ -713,7 +714,7 @@ public: // Do nothing return val; } else if (oldDepth == 0 && newDepth != 0) { - return (1 << newDepth) - 1; + return static_cast<ChannelType>((1 << newDepth) - 1); } else if (newDepth > oldDepth) { return Replicate(val, oldDepth, newDepth); } else { @@ -721,10 +722,11 @@ public: if (newDepth == 0) { return 0xFF; } else { - uint8_t bitsWasted = oldDepth - newDepth; + uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth); uint16_t v = static_cast<uint16_t>(val); - v = (v + (1 << (bitsWasted - 1))) >> bitsWasted; - v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), (1 << newDepth) - 1); + v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted); + v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), + static_cast<uint16_t>((1 << newDepth) - 1)); return static_cast<uint8_t>(v); } } @@ -1190,18 +1192,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); - seed1 *= seed1; - seed2 *= seed2; - seed3 *= seed3; - seed4 *= seed4; - seed5 *= seed5; - seed6 *= seed6; - seed7 *= seed7; - seed8 *= seed8; - seed9 *= seed9; - seed10 *= seed10; - seed11 *= seed11; - seed12 *= seed12; + seed1 = static_cast<uint8_t>(seed1 * seed1); + seed2 = static_cast<uint8_t>(seed2 * seed2); + seed3 = static_cast<uint8_t>(seed3 * seed3); + seed4 = static_cast<uint8_t>(seed4 * seed4); + seed5 = static_cast<uint8_t>(seed5 * seed5); + seed6 = static_cast<uint8_t>(seed6 * seed6); + seed7 = static_cast<uint8_t>(seed7 * seed7); + seed8 = static_cast<uint8_t>(seed8 * seed8); + seed9 = static_cast<uint8_t>(seed9 * seed9); + seed10 = static_cast<uint8_t>(seed10 * seed10); + seed11 = static_cast<uint8_t>(seed11 * seed11); + seed12 = static_cast<uint8_t>(seed12 * seed12); int32_t sh1, sh2, sh3; if (seed & 1) { @@ -1213,18 +1215,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, } sh3 = (seed & 0x10) ? sh1 : sh2; - seed1 >>= sh1; - seed2 >>= sh2; - seed3 >>= sh1; - seed4 >>= sh2; - seed5 >>= sh1; - seed6 >>= sh2; - seed7 >>= sh1; - seed8 >>= sh2; - seed9 >>= sh3; - seed10 >>= sh3; - seed11 >>= sh3; - seed12 >>= sh3; + seed1 = static_cast<uint8_t>(seed1 >> sh1); + seed2 = static_cast<uint8_t>(seed2 >> sh2); + seed3 = static_cast<uint8_t>(seed3 >> sh1); + seed4 = static_cast<uint8_t>(seed4 >> sh2); + seed5 = static_cast<uint8_t>(seed5 >> sh1); + seed6 = static_cast<uint8_t>(seed6 >> sh2); + seed7 = static_cast<uint8_t>(seed7 >> sh1); + seed8 = static_cast<uint8_t>(seed8 >> sh2); + seed9 = static_cast<uint8_t>(seed9 >> sh3); + seed10 = static_cast<uint8_t>(seed10 >> sh3); + seed11 = static_cast<uint8_t>(seed11 >> sh3); + seed12 = static_cast<uint8_t>(seed12 >> sh3); int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); @@ -1557,7 +1559,9 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, // Make sure that higher non-texel bits are set to zero const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; - texelWeightData[clearByteStart - 1] &= (1 << (weightParams.GetPackedBitSize() % 8)) - 1; + texelWeightData[clearByteStart - 1] = + texelWeightData[clearByteStart - 1] & + static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); std::vector<IntegerEncodedValue> texelWeightValues; diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index e36bc2c04..8e82c6748 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -132,6 +132,8 @@ enum class SwizzleSource : u32 { }; union TextureHandle { + TextureHandle(u32 raw) : raw{raw} {} + u32 raw; BitField<0, 20, u32> tic_id; BitField<20, 12, u32> tsc_id; @@ -340,13 +342,14 @@ struct TSCEntry { float GetLodBias() const { // Sign extend the 13-bit value. constexpr u32 mask = 1U << (13 - 1); - return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f; + return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; } std::array<float, 4> GetBorderColor() const { if (srgb_conversion) { - return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f, - srgb_border_color_b / 255.0f, border_color[3]}; + return {static_cast<float>(srgb_border_color_r) / 255.0f, + static_cast<float>(srgb_border_color_g) / 255.0f, + static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]}; } return border_color; } @@ -354,7 +357,6 @@ struct TSCEntry { static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); struct FullTextureInfo { - u32 index; TICEntry tic; TSCEntry tsc; }; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 60cda0ca3..8e947394c 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -28,7 +28,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) { u16 GetResolutionScaleFactor(const RendererBase& renderer) { return static_cast<u16>( - Settings::values.resolution_factor + Settings::values.resolution_factor != 0 ? Settings::values.resolution_factor : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()); } |
