aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt19
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h18
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h119
-rw-r--r--src/video_core/engines/fermi_2d.cpp30
-rw-r--r--src/video_core/engines/fermi_2d.h12
-rw-r--r--src/video_core/engines/kepler_compute.cpp30
-rw-r--r--src/video_core/engines/kepler_compute.h39
-rw-r--r--src/video_core/engines/kepler_memory.h4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp142
-rw-r--r--src/video_core/engines/maxwell_3d.h144
-rw-r--r--src/video_core/engines/maxwell_dma.h10
-rw-r--r--src/video_core/engines/shader_bytecode.h78
-rw-r--r--src/video_core/engines/shader_header.h50
-rw-r--r--src/video_core/gpu.cpp15
-rw-r--r--src/video_core/gpu.h14
-rw-r--r--src/video_core/gpu_asynch.cpp4
-rw-r--r--src/video_core/gpu_asynch.h1
-rw-r--r--src/video_core/gpu_synch.h1
-rw-r--r--src/video_core/gpu_thread.cpp19
-rw-r--r--src/video_core/gpu_thread.h9
-rw-r--r--src/video_core/macro_interpreter.cpp71
-rw-r--r--src/video_core/macro_interpreter.h80
-rw-r--r--src/video_core/morton.cpp24
-rw-r--r--src/video_core/rasterizer_accelerated.cpp63
-rw-r--r--src/video_core/rasterizer_accelerated.h31
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h20
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_device.h10
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp120
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp554
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h77
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp503
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp473
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h49
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp104
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h36
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp20
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h15
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp230
-rw-r--r--src/video_core/renderer_opengl/gl_state.h227
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp173
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp11
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp142
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp420
-rw-r--r--src/video_core/shader/ast.cpp753
-rw-r--r--src/video_core/shader/ast.h400
-rw-r--r--src/video_core/shader/compiler_settings.cpp26
-rw-r--r--src/video_core/shader/compiler_settings.h26
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp110
-rw-r--r--src/video_core/shader/const_buffer_locker.h80
-rw-r--r--src/video_core/shader/control_flow.cpp540
-rw-r--r--src/video_core/shader/control_flow.h77
-rw-r--r--src/video_core/shader/decode.cpp207
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp11
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp4
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp8
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp6
-rw-r--r--src/video_core/shader/decode/ffma.cpp4
-rw-r--r--src/video_core/shader/decode/half_set.cpp4
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp7
-rw-r--r--src/video_core/shader/decode/image.cpp50
-rw-r--r--src/video_core/shader/decode/memory.cpp39
-rw-r--r--src/video_core/shader/decode/other.cpp4
-rw-r--r--src/video_core/shader/decode/shift.cpp2
-rw-r--r--src/video_core/shader/decode/texture.cpp181
-rw-r--r--src/video_core/shader/decode/video.cpp2
-rw-r--r--src/video_core/shader/decode/warp.cpp74
-rw-r--r--src/video_core/shader/expr.cpp93
-rw-r--r--src/video_core/shader/expr.h156
-rw-r--r--src/video_core/shader/node.h115
-rw-r--r--src/video_core/shader/shader_ir.cpp131
-rw-r--r--src/video_core/shader/shader_ir.h76
-rw-r--r--src/video_core/surface.cpp307
-rw-r--r--src/video_core/surface.h248
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp208
-rw-r--r--src/video_core/texture_cache/format_lookup_table.h51
-rw-r--r--src/video_core/texture_cache/surface_base.cpp1
-rw-r--r--src/video_core/texture_cache/surface_base.h5
-rw-r--r--src/video_core/texture_cache/surface_params.cpp32
-rw-r--r--src/video_core/texture_cache/surface_params.h9
-rw-r--r--src/video_core/texture_cache/texture_cache.h281
-rw-r--r--src/video_core/textures/astc.cpp88
-rw-r--r--src/video_core/textures/texture.h10
-rw-r--r--src/video_core/video_core.cpp2
88 files changed, 5823 insertions, 2876 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e2f85c5f1..6f3f2aa9f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -6,6 +6,7 @@ add_library(video_core STATIC
dma_pusher.h
debug_utils/debug_utils.cpp
debug_utils/debug_utils.h
+ engines/const_buffer_engine_interface.h
engines/const_buffer_info.h
engines/engine_upload.cpp
engines/engine_upload.h
@@ -35,6 +36,8 @@ add_library(video_core STATIC
memory_manager.h
morton.cpp
morton.h
+ rasterizer_accelerated.cpp
+ rasterizer_accelerated.h
rasterizer_cache.cpp
rasterizer_cache.h
rasterizer_interface.h
@@ -105,9 +108,17 @@ add_library(video_core STATIC
shader/decode/warp.cpp
shader/decode/xmad.cpp
shader/decode/other.cpp
+ shader/ast.cpp
+ shader/ast.h
+ shader/compiler_settings.cpp
+ shader/compiler_settings.h
+ shader/const_buffer_locker.cpp
+ shader/const_buffer_locker.h
shader/control_flow.cpp
shader/control_flow.h
shader/decode.cpp
+ shader/expr.cpp
+ shader/expr.h
shader/node_helper.cpp
shader/node_helper.h
shader/node.h
@@ -116,6 +127,8 @@ add_library(video_core STATIC
shader/track.cpp
surface.cpp
surface.h
+ texture_cache/format_lookup_table.cpp
+ texture_cache/format_lookup_table.h
texture_cache/surface_base.cpp
texture_cache/surface_base.h
texture_cache/surface_params.cpp
@@ -169,3 +182,9 @@ target_link_libraries(video_core PRIVATE glad)
if (ENABLE_VULKAN)
target_link_libraries(video_core PRIVATE sirit)
endif()
+
+if (MSVC)
+ target_compile_options(video_core PRIVATE /we4267)
+else()
+ target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion)
+endif()
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2442ddfd6..4408b5001 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -12,6 +12,10 @@
#include <utility>
#include <vector>
+#include <boost/icl/interval_map.hpp>
+#include <boost/icl/interval_set.hpp>
+#include <boost/range/iterator_range.hpp>
+
#include "common/alignment.h"
#include "common/common_types.h"
#include "core/core.h"
@@ -30,7 +34,7 @@ public:
using BufferInfo = std::pair<const TBufferType*, u64>;
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
- bool is_written = false) {
+ bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
auto& memory_manager = system.GPU().MemoryManager();
@@ -43,9 +47,13 @@ public:
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800;
- if (size < max_stream_size) {
+ if (use_fast_cbuf || size < max_stream_size) {
if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
- return StreamBufferUpload(host_ptr, size, alignment);
+ if (use_fast_cbuf) {
+ return ConstBufferUpload(host_ptr, size);
+ } else {
+ return StreamBufferUpload(host_ptr, size, alignment);
+ }
}
}
@@ -152,6 +160,10 @@ protected:
virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) = 0;
+ virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
+ return {};
+ }
+
/// Register an object into the cache
void Register(const MapInterval& new_map, bool inherit_written = false) {
const CacheAddr cache_ptr = new_map->GetStart();
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
new file mode 100644
index 000000000..ac27b6cbe
--- /dev/null
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -0,0 +1,119 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra::Engines {
+
+enum class ShaderType : u32 {
+ Vertex = 0,
+ TesselationControl = 1,
+ TesselationEval = 2,
+ Geometry = 3,
+ Fragment = 4,
+ Compute = 5,
+};
+
+struct SamplerDescriptor {
+ union {
+ BitField<0, 20, Tegra::Shader::TextureType> texture_type;
+ BitField<20, 1, u32> is_array;
+ BitField<21, 1, u32> is_buffer;
+ BitField<22, 1, u32> is_shadow;
+ u32 raw{};
+ };
+
+ bool operator==(const SamplerDescriptor& rhs) const noexcept {
+ return raw == rhs.raw;
+ }
+
+ bool operator!=(const SamplerDescriptor& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+
+ static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) {
+ SamplerDescriptor result;
+ switch (tic_texture_type) {
+ case Tegra::Texture::TextureType::Texture1D:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture2D:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture3D:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::TextureCubemap:
+ result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture1DArray:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
+ result.is_array.Assign(1);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture2DArray:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+ result.is_array.Assign(1);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture1DBuffer:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(1);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::Texture2DNoMipmap:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ case Tegra::Texture::TextureType::TextureCubeArray:
+ result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
+ result.is_array.Assign(1);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ default:
+ result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+ result.is_array.Assign(0);
+ result.is_buffer.Assign(0);
+ result.is_shadow.Assign(0);
+ return result;
+ }
+ }
+};
+static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
+
+class ConstBufferEngineInterface {
+public:
+ virtual ~ConstBufferEngineInterface() = default;
+ virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
+ virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
+ virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+ u64 offset) const = 0;
+ virtual u32 GetBoundBuffer() const = 0;
+};
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 7ff44f06d..85d308e26 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -28,6 +28,13 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
}
}
+std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
+ const u32 line_a = src_2 - src_1;
+ const u32 line_b = dst_2 - dst_1;
+ const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
+ return {line_b - (excess * line_b) / line_a, excess};
+}
+
void Fermi2D::HandleSurfaceCopy() {
LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}",
static_cast<u32>(regs.operation));
@@ -47,10 +54,27 @@ void Fermi2D::HandleSurfaceCopy() {
src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
}
+ u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
+ u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
+ const auto [new_dst_w, src_excess_x] =
+ DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
+ const auto [new_dst_h, src_excess_y] =
+ DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
+ dst_blit_x2 = new_dst_w + regs.blit_dst_x;
+ src_blit_x2 = src_blit_x2 - src_excess_x;
+ dst_blit_y2 = new_dst_h + regs.blit_dst_y;
+ src_blit_y2 = src_blit_y2 - src_excess_y;
+ const auto [new_src_w, dst_excess_x] =
+ DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
+ const auto [new_src_h, dst_excess_y] =
+ DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
+ src_blit_x2 = new_src_w + src_blit_x1;
+ dst_blit_x2 = dst_blit_x2 - dst_excess_x;
+ src_blit_y2 = new_src_h + src_blit_y1;
+ dst_blit_y2 = dst_blit_y2 - dst_excess_y;
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
- const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
- regs.blit_dst_x + regs.blit_dst_width,
- regs.blit_dst_y + regs.blit_dst_height};
+ const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
+ dst_blit_y2};
Config copy_config;
copy_config.operation = regs.operation;
copy_config.filter = regs.blit_control.filter;
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0901cf2fa..dba342c70 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -99,19 +99,19 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x80);
+ INSERT_UNION_PADDING_WORDS(0x80);
Surface dst;
- INSERT_PADDING_WORDS(2);
+ INSERT_UNION_PADDING_WORDS(2);
Surface src;
- INSERT_PADDING_WORDS(0x15);
+ INSERT_UNION_PADDING_WORDS(0x15);
Operation operation;
- INSERT_PADDING_WORDS(0x177);
+ INSERT_UNION_PADDING_WORDS(0x177);
union {
u32 raw;
@@ -119,7 +119,7 @@ public:
BitField<4, 1, Filter> filter;
} blit_control;
- INSERT_PADDING_WORDS(0x8);
+ INSERT_UNION_PADDING_WORDS(0x8);
u32 blit_dst_x;
u32 blit_dst_y;
@@ -130,7 +130,7 @@ public:
u64 blit_src_x;
u64 blit_src_y;
- INSERT_PADDING_WORDS(0x21);
+ INSERT_UNION_PADDING_WORDS(0x21);
};
std::array<u32, NUM_REGS> reg_array;
};
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 63d449135..3a39aeabe 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -50,7 +50,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
}
}
-Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
+Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
ASSERT(cbuf_mask[regs.tex_cb_index]);
@@ -61,22 +61,38 @@ Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) co
ASSERT(address < texinfo.Address() + texinfo.size);
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
- return GetTextureInfo(tex_handle, offset);
+ return GetTextureInfo(tex_handle);
}
-Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle,
- std::size_t offset) const {
- return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id),
- GetTSCEntry(tex_handle.tsc_id)};
+Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
+ return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
}
-u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const {
+u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
+ ASSERT(stage == ShaderType::Compute);
const auto& buffer = launch_description.const_buffer_config[const_buffer];
u32 result;
std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
return result;
}
+SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
+ return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
+}
+
+SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+ u64 offset) const {
+ ASSERT(stage == ShaderType::Compute);
+ const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
+ const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
+
+ const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+ const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
+ SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
+ result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+ return result;
+}
+
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 90cf650d2..5259d92bd 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,6 +10,7 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
#include "video_core/textures/texture.h"
@@ -37,7 +38,7 @@ namespace Tegra::Engines {
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
-class KeplerCompute final {
+class KeplerCompute final : public ConstBufferEngineInterface {
public:
explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
@@ -50,7 +51,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x60);
+ INSERT_UNION_PADDING_WORDS(0x60);
Upload::Registers upload;
@@ -62,7 +63,7 @@ public:
u32 data_upload;
- INSERT_PADDING_WORDS(0x3F);
+ INSERT_UNION_PADDING_WORDS(0x3F);
struct {
u32 address;
@@ -71,11 +72,11 @@ public:
}
} launch_desc_loc;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
u32 launch;
- INSERT_PADDING_WORDS(0x4A7);
+ INSERT_UNION_PADDING_WORDS(0x4A7);
struct {
u32 address_high;
@@ -87,7 +88,7 @@ public:
}
} tsc;
- INSERT_PADDING_WORDS(0x3);
+ INSERT_UNION_PADDING_WORDS(0x3);
struct {
u32 address_high;
@@ -99,7 +100,7 @@ public:
}
} tic;
- INSERT_PADDING_WORDS(0x22);
+ INSERT_UNION_PADDING_WORDS(0x22);
struct {
u32 address_high;
@@ -110,11 +111,11 @@ public:
}
} code_loc;
- INSERT_PADDING_WORDS(0x3FE);
+ INSERT_UNION_PADDING_WORDS(0x3FE);
u32 tex_cb_index;
- INSERT_PADDING_WORDS(0x374);
+ INSERT_UNION_PADDING_WORDS(0x374);
};
std::array<u32, NUM_REGS> reg_array;
};
@@ -178,7 +179,7 @@ public:
};
INSERT_PADDING_WORDS(0x11);
- } launch_description;
+ } launch_description{};
struct {
u32 write_offset = 0;
@@ -195,13 +196,21 @@ public:
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
- Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const;
+ Texture::FullTextureInfo GetTexture(std::size_t offset) const;
- /// Given a Texture Handle, returns the TSC and TIC entries.
- Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
- std::size_t offset) const;
+ /// Given a texture handle, returns the TSC and TIC entries.
+ Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
- u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const;
+ u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
+
+ SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
+
+ SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+ u64 offset) const override;
+
+ u32 GetBoundBuffer() const override {
+ return regs.tex_cb_index;
+ }
private:
Core::System& system;
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index e0e25c321..396fb6e86 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -45,7 +45,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x60);
+ INSERT_UNION_PADDING_WORDS(0x60);
Upload::Registers upload;
@@ -57,7 +57,7 @@ public:
u32 data;
- INSERT_PADDING_WORDS(0x11);
+ INSERT_UNION_PADDING_WORDS(0x11);
};
std::array<u32, NUM_REGS> reg_array;
};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index b318aedb8..a44c09003 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -98,10 +98,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
}
-#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
+#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name))
void Maxwell3D::InitDirtySettings() {
- const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
+ const auto set_block = [this](std::size_t start, std::size_t range, u8 position) {
const auto start_itr = dirty_pointers.begin() + start;
const auto end_itr = start_itr + range;
std::fill(start_itr, end_itr, position);
@@ -112,10 +112,10 @@ void Maxwell3D::InitDirtySettings() {
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
- u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
+ u8 rt_dirty_reg = DIRTY_REGS_POS(render_target);
for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
set_block(rt_reg, registers_per_rt, rt_dirty_reg);
- rt_dirty_reg++;
+ ++rt_dirty_reg;
}
constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
@@ -129,35 +129,35 @@ void Maxwell3D::InitDirtySettings() {
constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
- u32 va_reg = DIRTY_REGS_POS(vertex_array);
- u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
+ u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
+ u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
vertex_reg += vertex_array_size) {
- set_block(vertex_reg, 3, va_reg);
+ set_block(vertex_reg, 3, va_dirty_reg);
// The divisor concerns vertex array instances
- dirty_pointers[vertex_reg + 3] = vi_reg;
- va_reg++;
- vi_reg++;
+ dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg;
+ ++va_dirty_reg;
+ ++vi_dirty_reg;
}
constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
- va_reg = DIRTY_REGS_POS(vertex_array);
+ va_dirty_reg = DIRTY_REGS_POS(vertex_array);
for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
vertex_reg += vertex_limit_size) {
- set_block(vertex_reg, vertex_limit_size, va_reg);
- va_reg++;
+ set_block(vertex_reg, vertex_limit_size, va_dirty_reg);
+ va_dirty_reg++;
}
constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
constexpr u32 vertex_instance_size =
sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
constexpr u32 vertex_instance_end =
vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
- vi_reg = DIRTY_REGS_POS(vertex_instance);
+ vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
vertex_reg += vertex_instance_size) {
- set_block(vertex_reg, vertex_instance_size, vi_reg);
- vi_reg++;
+ set_block(vertex_reg, vertex_instance_size, vi_dirty_reg);
+ vi_dirty_reg++;
}
set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
DIRTY_REGS_POS(vertex_attrib_format));
@@ -171,7 +171,7 @@ void Maxwell3D::InitDirtySettings() {
// State
// Viewport
- constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
+ constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
set_block(viewport_start, viewport_size, viewport_dirty_reg);
@@ -198,7 +198,7 @@ void Maxwell3D::InitDirtySettings() {
set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
// Depth Test
- constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
+ constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
@@ -223,12 +223,12 @@ void Maxwell3D::InitDirtySettings() {
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
// Color Mask
- constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
+ constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
color_mask_dirty_reg);
// Blend State
- constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
+ constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
blend_state_dirty_reg);
dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
@@ -237,18 +237,23 @@ void Maxwell3D::InitDirtySettings() {
blend_state_dirty_reg);
// Scissor State
- constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
+ constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
scissor_test_dirty_reg);
// Polygon Offset
- constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
+ constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
+
+ // Depth bounds
+ constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values);
+ dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg;
}
void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
@@ -256,7 +261,8 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
executing_macro = 0;
// Lookup the macro offset
- const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size();
+ const u32 entry =
+ ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
// Execute the current macro.
macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters);
@@ -473,7 +479,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) {
}
void Maxwell3D::FlushMMEInlineDraw() {
- LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
@@ -736,14 +742,6 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
- [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()};
- [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()};
- [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()};
- [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()};
-
- // TODO(Subv): Different data types for separate components are not supported
- DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
-
return tic_entry;
}
@@ -755,61 +753,8 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
return tsc_entry;
}
-std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const {
- std::vector<Texture::FullTextureInfo> textures;
-
- auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)];
- auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];
- ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
-
- GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;
-
- // Offset into the texture constbuffer where the texture info begins.
- static constexpr std::size_t TextureInfoOffset = 0x20;
-
- for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
- current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
-
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
-
- Texture::FullTextureInfo tex_info{};
- // TODO(Subv): Use the shader to determine which textures are actually accessed.
- tex_info.index =
- static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) /
- sizeof(Texture::TextureHandle);
-
- // Load the TIC data.
- auto tic_entry = GetTICEntry(tex_handle.tic_id);
- // TODO(Subv): Workaround for BitField's move constructor being deleted.
- std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-
- // Load the TSC data
- auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
- // TODO(Subv): Workaround for BitField's move constructor being deleted.
- std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-
- textures.push_back(tex_info);
- }
-
- return textures;
-}
-
-Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle,
- std::size_t offset) const {
- Texture::FullTextureInfo tex_info{};
- tex_info.index = static_cast<u32>(offset);
-
- // Load the TIC data.
- auto tic_entry = GetTICEntry(tex_handle.tic_id);
- // TODO(Subv): Workaround for BitField's move constructor being deleted.
- std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-
- // Load the TSC data
- auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
- // TODO(Subv): Workaround for BitField's move constructor being deleted.
- std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-
- return tex_info;
+Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
+ return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
}
Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
@@ -825,7 +770,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
- return GetTextureInfo(tex_handle, offset);
+ return GetTextureInfo(tex_handle);
}
u32 Maxwell3D::GetRegisterValue(u32 method) const {
@@ -841,7 +786,8 @@ void Maxwell3D::ProcessClearBuffers() {
rasterizer.Clear();
}
-u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const {
+u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
+ ASSERT(stage != ShaderType::Compute);
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& buffer = shader_stage.const_buffers[const_buffer];
u32 result;
@@ -849,4 +795,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6
return result;
}
+SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
+ return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
+}
+
+SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+ u64 offset) const {
+ ASSERT(stage != ShaderType::Compute);
+ const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
+ const auto& tex_info_buffer = shader.const_buffers[const_buffer];
+ const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
+
+ const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+ const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
+ SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
+ result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+ return result;
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 4c97759ed..1aa7c274f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -15,6 +15,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
@@ -44,7 +45,7 @@ namespace Tegra::Engines {
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
-class Maxwell3D final {
+class Maxwell3D final : public ConstBufferEngineInterface {
public:
explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
@@ -495,7 +496,7 @@ public:
Equation equation_a;
Factor factor_source_a;
Factor factor_dest_a;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
};
struct RenderTargetConfig {
@@ -516,7 +517,7 @@ public:
};
u32 layer_stride;
u32 base_layer;
- INSERT_PADDING_WORDS(7);
+ INSERT_UNION_PADDING_WORDS(7);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
@@ -541,7 +542,7 @@ public:
f32 translate_x;
f32 translate_y;
f32 translate_z;
- INSERT_PADDING_WORDS(2);
+ INSERT_UNION_PADDING_WORDS(2);
Common::Rectangle<s32> GetRect() const {
return {
@@ -605,7 +606,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x45);
+ INSERT_UNION_PADDING_WORDS(0x45);
struct {
u32 upload_address;
@@ -614,7 +615,7 @@ public:
u32 bind;
} macros;
- INSERT_PADDING_WORDS(0x17);
+ INSERT_UNION_PADDING_WORDS(0x17);
Upload::Registers upload;
struct {
@@ -625,7 +626,7 @@ public:
u32 data_upload;
- INSERT_PADDING_WORDS(0x44);
+ INSERT_UNION_PADDING_WORDS(0x44);
struct {
union {
@@ -635,11 +636,11 @@ public:
};
} sync_info;
- INSERT_PADDING_WORDS(0x11E);
+ INSERT_UNION_PADDING_WORDS(0x11E);
u32 tfb_enabled;
- INSERT_PADDING_WORDS(0x2E);
+ INSERT_UNION_PADDING_WORDS(0x2E);
std::array<RenderTargetConfig, NumRenderTargets> rt;
@@ -647,47 +648,49 @@ public:
std::array<ViewPort, NumViewports> viewports;
- INSERT_PADDING_WORDS(0x1D);
+ INSERT_UNION_PADDING_WORDS(0x1D);
struct {
u32 first;
u32 count;
} vertex_buffer;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
float clear_color[4];
float clear_depth;
- INSERT_PADDING_WORDS(0x3);
+ INSERT_UNION_PADDING_WORDS(0x3);
s32 clear_stencil;
- INSERT_PADDING_WORDS(0x7);
+ INSERT_UNION_PADDING_WORDS(0x7);
u32 polygon_offset_point_enable;
u32 polygon_offset_line_enable;
u32 polygon_offset_fill_enable;
- INSERT_PADDING_WORDS(0xD);
+ INSERT_UNION_PADDING_WORDS(0xD);
std::array<ScissorTest, NumViewports> scissor_test;
- INSERT_PADDING_WORDS(0x15);
+ INSERT_UNION_PADDING_WORDS(0x15);
s32 stencil_back_func_ref;
u32 stencil_back_mask;
u32 stencil_back_func_mask;
- INSERT_PADDING_WORDS(0xC);
+ INSERT_UNION_PADDING_WORDS(0xC);
u32 color_mask_common;
- INSERT_PADDING_WORDS(0x6);
+ INSERT_UNION_PADDING_WORDS(0x6);
u32 rt_separate_frag_data;
- INSERT_PADDING_WORDS(0xC);
+ f32 depth_bounds[2];
+
+ INSERT_UNION_PADDING_WORDS(0xA);
struct {
u32 address_high;
@@ -707,7 +710,7 @@ public:
}
} zeta;
- INSERT_PADDING_WORDS(0x41);
+ INSERT_UNION_PADDING_WORDS(0x41);
union {
BitField<0, 4, u32> stencil;
@@ -716,11 +719,11 @@ public:
BitField<12, 4, u32> viewport;
} clear_flags;
- INSERT_PADDING_WORDS(0x19);
+ INSERT_UNION_PADDING_WORDS(0x19);
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
- INSERT_PADDING_WORDS(0xF);
+ INSERT_UNION_PADDING_WORDS(0xF);
struct {
union {
@@ -743,16 +746,16 @@ public:
}
} rt_control;
- INSERT_PADDING_WORDS(0x2);
+ INSERT_UNION_PADDING_WORDS(0x2);
u32 zeta_width;
u32 zeta_height;
- INSERT_PADDING_WORDS(0x27);
+ INSERT_UNION_PADDING_WORDS(0x27);
u32 depth_test_enable;
- INSERT_PADDING_WORDS(0x5);
+ INSERT_UNION_PADDING_WORDS(0x5);
u32 independent_blend_enable;
@@ -760,7 +763,7 @@ public:
u32 alpha_test_enabled;
- INSERT_PADDING_WORDS(0x6);
+ INSERT_UNION_PADDING_WORDS(0x6);
u32 d3d_cull_mode;
@@ -774,7 +777,7 @@ public:
float b;
float a;
} blend_color;
- INSERT_PADDING_WORDS(0x4);
+ INSERT_UNION_PADDING_WORDS(0x4);
struct {
u32 separate_alpha;
@@ -783,7 +786,7 @@ public:
Blend::Factor factor_dest_rgb;
Blend::Equation equation_a;
Blend::Factor factor_source_a;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
Blend::Factor factor_dest_a;
u32 enable_common;
@@ -799,7 +802,7 @@ public:
u32 stencil_front_func_mask;
u32 stencil_front_mask;
- INSERT_PADDING_WORDS(0x2);
+ INSERT_UNION_PADDING_WORDS(0x2);
u32 frag_color_clamp;
@@ -808,12 +811,12 @@ public:
BitField<4, 1, u32> triangle_rast_flip;
} screen_y_control;
- INSERT_PADDING_WORDS(0x21);
+ INSERT_UNION_PADDING_WORDS(0x21);
u32 vb_element_base;
u32 vb_base_instance;
- INSERT_PADDING_WORDS(0x35);
+ INSERT_UNION_PADDING_WORDS(0x35);
union {
BitField<0, 1, u32> c0;
@@ -826,11 +829,11 @@ public:
BitField<7, 1, u32> c7;
} clip_distance_enabled;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
float point_size;
- INSERT_PADDING_WORDS(0x7);
+ INSERT_UNION_PADDING_WORDS(0x7);
u32 zeta_enable;
@@ -839,7 +842,7 @@ public:
BitField<4, 1, u32> alpha_to_one;
} multisample_control;
- INSERT_PADDING_WORDS(0x4);
+ INSERT_UNION_PADDING_WORDS(0x4);
struct {
u32 address_high;
@@ -863,11 +866,11 @@ public:
}
} tsc;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
float polygon_offset_factor;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
struct {
u32 tic_address_high;
@@ -880,7 +883,7 @@ public:
}
} tic;
- INSERT_PADDING_WORDS(0x5);
+ INSERT_UNION_PADDING_WORDS(0x5);
u32 stencil_two_side_enable;
StencilOp stencil_back_op_fail;
@@ -888,13 +891,13 @@ public:
StencilOp stencil_back_op_zpass;
ComparisonOp stencil_back_func_func;
- INSERT_PADDING_WORDS(0x4);
+ INSERT_UNION_PADDING_WORDS(0x4);
u32 framebuffer_srgb;
float polygon_offset_units;
- INSERT_PADDING_WORDS(0x11);
+ INSERT_UNION_PADDING_WORDS(0x11);
union {
BitField<2, 1, u32> coord_origin;
@@ -910,7 +913,7 @@ public:
(static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low);
}
} code_address;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
struct {
u32 vertex_end_gl;
@@ -922,14 +925,14 @@ public:
};
} draw;
- INSERT_PADDING_WORDS(0xA);
+ INSERT_UNION_PADDING_WORDS(0xA);
struct {
u32 enabled;
u32 index;
} primitive_restart;
- INSERT_PADDING_WORDS(0x5F);
+ INSERT_UNION_PADDING_WORDS(0x5F);
struct {
u32 start_addr_high;
@@ -970,9 +973,9 @@ public:
}
} index_array;
- INSERT_PADDING_WORDS(0x7);
+ INSERT_UNION_PADDING_WORDS(0x7);
- INSERT_PADDING_WORDS(0x1F);
+ INSERT_UNION_PADDING_WORDS(0x1F);
float polygon_offset_clamp;
@@ -986,17 +989,17 @@ public:
}
} instanced_arrays;
- INSERT_PADDING_WORDS(0x6);
+ INSERT_UNION_PADDING_WORDS(0x6);
Cull cull;
u32 pixel_center_integer;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
u32 viewport_transform_enabled;
- INSERT_PADDING_WORDS(0x3);
+ INSERT_UNION_PADDING_WORDS(0x3);
union {
BitField<0, 1, u32> depth_range_0_1;
@@ -1004,13 +1007,13 @@ public:
BitField<4, 1, u32> depth_clamp_far;
} view_volume_clip_control;
- INSERT_PADDING_WORDS(0x21);
+ INSERT_UNION_PADDING_WORDS(0x21);
struct {
u32 enable;
LogicOperation operation;
} logic_op;
- INSERT_PADDING_WORDS(0x1);
+ INSERT_UNION_PADDING_WORDS(0x1);
union {
u32 raw;
@@ -1023,9 +1026,9 @@ public:
BitField<6, 4, u32> RT;
BitField<10, 11, u32> layer;
} clear_buffers;
- INSERT_PADDING_WORDS(0xB);
+ INSERT_UNION_PADDING_WORDS(0xB);
std::array<ColorMask, NumRenderTargets> color_mask;
- INSERT_PADDING_WORDS(0x38);
+ INSERT_UNION_PADDING_WORDS(0x38);
struct {
u32 query_address_high;
@@ -1047,7 +1050,7 @@ public:
}
} query;
- INSERT_PADDING_WORDS(0x3C);
+ INSERT_UNION_PADDING_WORDS(0x3C);
struct {
union {
@@ -1087,10 +1090,10 @@ public:
BitField<4, 4, ShaderProgram> program;
};
u32 offset;
- INSERT_PADDING_WORDS(14);
+ INSERT_UNION_PADDING_WORDS(14);
} shader_config[MaxShaderProgram];
- INSERT_PADDING_WORDS(0x60);
+ INSERT_UNION_PADDING_WORDS(0x60);
u32 firmware[0x20];
@@ -1107,7 +1110,7 @@ public:
}
} const_buffer;
- INSERT_PADDING_WORDS(0x10);
+ INSERT_UNION_PADDING_WORDS(0x10);
struct {
union {
@@ -1115,14 +1118,14 @@ public:
BitField<0, 1, u32> valid;
BitField<4, 5, u32> index;
};
- INSERT_PADDING_WORDS(7);
+ INSERT_UNION_PADDING_WORDS(7);
} cb_bind[MaxShaderStage];
- INSERT_PADDING_WORDS(0x56);
+ INSERT_UNION_PADDING_WORDS(0x56);
u32 tex_cb_index;
- INSERT_PADDING_WORDS(0x395);
+ INSERT_UNION_PADDING_WORDS(0x395);
struct {
/// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1134,14 +1137,14 @@ public:
}
} ssbo_info;
- INSERT_PADDING_WORDS(0x11);
+ INSERT_UNION_PADDING_WORDS(0x11);
struct {
u32 address[MaxShaderStage];
u32 size[MaxShaderStage];
} tex_info_buffers;
- INSERT_PADDING_WORDS(0xCC);
+ INSERT_UNION_PADDING_WORDS(0xCC);
};
std::array<u32, NUM_REGS> reg_array;
};
@@ -1163,6 +1166,8 @@ public:
struct DirtyRegs {
static constexpr std::size_t NUM_REGS = 256;
+ static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max());
+
union {
struct {
bool null_dirty;
@@ -1201,6 +1206,7 @@ public:
bool transform_feedback;
bool color_mask;
bool polygon_offset;
+ bool depth_bounds_values;
// Complementary
bool viewport_transform;
@@ -1244,17 +1250,22 @@ public:
void FlushMMEInlineDraw();
- /// Given a Texture Handle, returns the TSC and TIC entries.
- Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
- std::size_t offset) const;
-
- /// Returns a list of enabled textures for the specified shader stage.
- std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
+ /// Given a texture handle, returns the TSC and TIC entries.
+ Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
/// Returns the texture information for a specific texture in a specific shader stage.
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
- u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const;
+ u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
+
+ SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
+
+ SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+ u64 offset) const override;
+
+ u32 GetBoundBuffer() const override {
+ return regs.tex_cb_index;
+ }
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
/// we've seen used.
@@ -1400,6 +1411,7 @@ ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
ASSERT_REG_POSITION(color_mask_common, 0x3E4);
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
+ASSERT_REG_POSITION(depth_bounds, 0x3EC);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(clear_flags, 0x43E);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 93808a9bb..4f40d1d1f 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -94,7 +94,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0xC0);
+ INSERT_UNION_PADDING_WORDS(0xC0);
struct {
union {
@@ -112,7 +112,7 @@ public:
};
} exec;
- INSERT_PADDING_WORDS(0x3F);
+ INSERT_UNION_PADDING_WORDS(0x3F);
struct {
u32 address_high;
@@ -139,7 +139,7 @@ public:
u32 x_count;
u32 y_count;
- INSERT_PADDING_WORDS(0xB8);
+ INSERT_UNION_PADDING_WORDS(0xB8);
u32 const0;
u32 const1;
@@ -162,11 +162,11 @@ public:
Parameters dst_params;
- INSERT_PADDING_WORDS(1);
+ INSERT_UNION_PADDING_WORDS(1);
Parameters src_params;
- INSERT_PADDING_WORDS(0x13);
+ INSERT_UNION_PADDING_WORDS(0x13);
};
std::array<u32, NUM_REGS> reg_array;
};
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7a6355ce2..9fafed4a2 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -574,7 +574,7 @@ enum class ShuffleOperation : u64 {
};
union Instruction {
- Instruction& operator=(const Instruction& instr) {
+ constexpr Instruction& operator=(const Instruction& instr) {
value = instr.value;
return *this;
}
@@ -616,6 +616,14 @@ union Instruction {
} shfl;
union {
+ BitField<44, 1, u64> ftz;
+ BitField<39, 2, u64> tab5cb8_2;
+ BitField<38, 1, u64> ndv;
+ BitField<47, 1, u64> cc;
+ BitField<28, 8, u64> swizzle;
+ } fswzadd;
+
+ union {
BitField<8, 8, Register> gpr;
BitField<20, 24, s64> offset;
} gmem;
@@ -1238,6 +1246,32 @@ union Instruction {
} tld4;
union {
+ BitField<35, 1, u64> ndv_flag;
+ BitField<49, 1, u64> nodep_flag;
+ BitField<50, 1, u64> dc_flag;
+ BitField<33, 2, u64> info;
+ BitField<37, 2, u64> component;
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::NDV:
+ return ndv_flag != 0;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ case TextureMiscMode::DC:
+ return dc_flag != 0;
+ case TextureMiscMode::AOFFI:
+ return info == 1;
+ case TextureMiscMode::PTP:
+ return info == 2;
+ default:
+ break;
+ }
+ return false;
+ }
+ } tld4_b;
+
+ union {
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> dc_flag;
BitField<51, 1, u64> aoffi_flag;
@@ -1452,7 +1486,8 @@ union Instruction {
u32 value = static_cast<u32>(target);
// The branch offset is relative to the next instruction and is stored in bytes, so
// divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
+ return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
+ 1;
}
} bra;
@@ -1466,7 +1501,8 @@ union Instruction {
u32 value = static_cast<u32>(target);
// The branch offset is relative to the next instruction and is stored in bytes, so
// divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
+ return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
+ 1;
}
} brx;
@@ -1564,6 +1600,7 @@ public:
DEPBAR,
VOTE,
SHFL,
+ FSWZADD,
BFE_C,
BFE_R,
BFE_IMM,
@@ -1590,7 +1627,8 @@ public:
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
TLD, // Texture Load
TLDS, // Texture Load with scalar/non-vec4 source/destinations
- TLD4, // Texture Load 4
+ TLD4, // Texture Gather 4
+ TLD4_B, // Texture Gather 4 Bindless
TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
TMML_B, // Texture Mip Map Level
TMML, // Texture Mip Map Level
@@ -1760,22 +1798,22 @@ public:
class Matcher {
public:
- Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type)
+ constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type)
: name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
- const char* GetName() const {
+ constexpr const char* GetName() const {
return name;
}
- u16 GetMask() const {
+ constexpr u16 GetMask() const {
return mask;
}
- Id GetId() const {
+ constexpr Id GetId() const {
return id;
}
- Type GetType() const {
+ constexpr Type GetType() const {
return type;
}
@@ -1784,7 +1822,7 @@ public:
* @param instruction The instruction to test
* @returns true if the given instruction matches.
*/
- bool Matches(u16 instruction) const {
+ constexpr bool Matches(u16 instruction) const {
return (instruction & mask) == expected;
}
@@ -1818,32 +1856,32 @@ private:
* A '0' in a bitstring indicates that a zero must be present at that bit position.
* A '1' in a bitstring indicates that a one must be present at that bit position.
*/
- static auto GetMaskAndExpect(const char* const bitstring) {
+ static constexpr auto GetMaskAndExpect(const char* const bitstring) {
u16 mask = 0, expect = 0;
for (std::size_t i = 0; i < opcode_bitsize; i++) {
const std::size_t bit_position = opcode_bitsize - i - 1;
switch (bitstring[i]) {
case '0':
- mask |= 1 << bit_position;
+ mask |= static_cast<u16>(1U << bit_position);
break;
case '1':
- expect |= 1 << bit_position;
- mask |= 1 << bit_position;
+ expect |= static_cast<u16>(1U << bit_position);
+ mask |= static_cast<u16>(1U << bit_position);
break;
default:
// Ignore
break;
}
}
- return std::make_tuple(mask, expect);
+ return std::make_pair(mask, expect);
}
public:
/// Creates a matcher that can match and parse instructions based on bitstring.
- static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type,
- const char* const name) {
- const auto mask_expect = GetMaskAndExpect(bitstring);
- return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type);
+ static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type,
+ const char* const name) {
+ const auto [mask, expected] = GetMaskAndExpect(bitstring);
+ return Matcher(name, mask, expected, op, type);
}
};
@@ -1861,6 +1899,7 @@ private:
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
+ INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
@@ -1881,6 +1920,7 @@ private:
INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
+ INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index e86a7f04a..bc80661d8 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -38,37 +38,37 @@ struct Header {
BitField<26, 1, u32> does_load_or_store;
BitField<27, 1, u32> does_fp64;
BitField<28, 4, u32> stream_out_mask;
- } common0;
+ } common0{};
union {
BitField<0, 24, u32> shader_local_memory_low_size;
BitField<24, 8, u32> per_patch_attribute_count;
- } common1;
+ } common1{};
union {
BitField<0, 24, u32> shader_local_memory_high_size;
BitField<24, 8, u32> threads_per_input_primitive;
- } common2;
+ } common2{};
union {
BitField<0, 24, u32> shader_local_memory_crs_size;
BitField<24, 4, OutputTopology> output_topology;
BitField<28, 4, u32> reserved;
- } common3;
+ } common3{};
union {
BitField<0, 12, u32> max_output_vertices;
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
BitField<24, 4, u32> reserved;
BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
- } common4;
+ } common4{};
union {
struct {
- INSERT_PADDING_BYTES(3); // ImapSystemValuesA
- INSERT_PADDING_BYTES(1); // ImapSystemValuesB
- INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
- INSERT_PADDING_BYTES(2); // ImapColor
+ INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_UNION_PADDING_BYTES(16); // ImapGenericVector[32]
+ INSERT_UNION_PADDING_BYTES(2); // ImapColor
union {
BitField<0, 8, u16> clip_distances;
BitField<8, 1, u16> point_sprite_s;
@@ -79,20 +79,20 @@ struct Header {
BitField<14, 1, u16> instance_id;
BitField<15, 1, u16> vertex_id;
};
- INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
- INSERT_PADDING_BYTES(1); // ImapReserved
- INSERT_PADDING_BYTES(3); // OmapSystemValuesA
- INSERT_PADDING_BYTES(1); // OmapSystemValuesB
- INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
- INSERT_PADDING_BYTES(2); // OmapColor
- INSERT_PADDING_BYTES(2); // OmapSystemValuesC
- INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10]
- INSERT_PADDING_BYTES(1); // OmapReserved
+ INSERT_UNION_PADDING_BYTES(5); // ImapFixedFncTexture[10]
+ INSERT_UNION_PADDING_BYTES(1); // ImapReserved
+ INSERT_UNION_PADDING_BYTES(3); // OmapSystemValuesA
+ INSERT_UNION_PADDING_BYTES(1); // OmapSystemValuesB
+ INSERT_UNION_PADDING_BYTES(16); // OmapGenericVector[32]
+ INSERT_UNION_PADDING_BYTES(2); // OmapColor
+ INSERT_UNION_PADDING_BYTES(2); // OmapSystemValuesC
+ INSERT_UNION_PADDING_BYTES(5); // OmapFixedFncTexture[10]
+ INSERT_UNION_PADDING_BYTES(1); // OmapReserved
} vtg;
struct {
- INSERT_PADDING_BYTES(3); // ImapSystemValuesA
- INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
union {
BitField<0, 2, AttributeUse> x;
BitField<2, 2, AttributeUse> y;
@@ -100,10 +100,10 @@ struct Header {
BitField<6, 2, AttributeUse> z;
u8 raw;
} imap_generic_vector[32];
- INSERT_PADDING_BYTES(2); // ImapColor
- INSERT_PADDING_BYTES(2); // ImapSystemValuesC
- INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
- INSERT_PADDING_BYTES(2); // ImapReserved
+ INSERT_UNION_PADDING_BYTES(2); // ImapColor
+ INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC
+ INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
+ INSERT_UNION_PADDING_BYTES(2); // ImapReserved
struct {
u32 target;
union {
@@ -139,6 +139,8 @@ struct Header {
return result;
}
} ps;
+
+ std::array<u32, 0xF> raw{};
};
u64 GetLocalMemorySize() const {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 76cfe8107..095660115 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include "common/assert.h"
+#include "common/microprofile.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/memory.h"
@@ -17,6 +18,8 @@
namespace Tegra {
+MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
+
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
: system{system}, renderer{renderer}, is_async{is_async} {
auto& rasterizer{renderer.Rasterizer()};
@@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const {
return *dma_pusher;
}
+void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
+ // Synced GPU, is always in sync
+ if (!is_async) {
+ return;
+ }
+ MICROPROFILE_SCOPE(GPU_wait);
+ while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
+ }
+}
+
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
syncpoints[syncpoint_id]++;
std::lock_guard lock{sync_mutex};
@@ -326,7 +339,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
- block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
+ block.timestamp = system.CoreTiming().GetTicks();
memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
sizeof(block));
} else {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 29fa8e95b..ecc338ae9 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -177,6 +177,12 @@ public:
/// Returns a reference to the GPU DMA pusher.
Tegra::DmaPusher& DmaPusher();
+ // Waits for the GPU to finish working
+ virtual void WaitIdle() const = 0;
+
+ /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
+ void WaitFence(u32 syncpoint_id, u32 value) const;
+
void IncrementSyncPoint(u32 syncpoint_id);
u32 GetSyncpointValue(u32 syncpoint_id) const;
@@ -201,7 +207,7 @@ public:
union {
struct {
- INSERT_PADDING_WORDS(0x4);
+ INSERT_UNION_PADDING_WORDS(0x4);
struct {
u32 address_high;
u32 address_low;
@@ -214,12 +220,12 @@ public:
u32 semaphore_sequence;
u32 semaphore_trigger;
- INSERT_PADDING_WORDS(0xC);
+ INSERT_UNION_PADDING_WORDS(0xC);
// The puser and the puller share the reference counter, the pusher only has read
// access
u32 reference_count;
- INSERT_PADDING_WORDS(0x5);
+ INSERT_UNION_PADDING_WORDS(0x5);
u32 semaphore_acquire;
u32 semaphore_release;
@@ -228,7 +234,7 @@ public:
BitField<4, 4, u32> operation;
BitField<8, 8, u32> id;
} fence_action;
- INSERT_PADDING_WORDS(0xE2);
+ INSERT_UNION_PADDING_WORDS(0xE2);
// Puller state
u32 acquire_mode;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index f2a3a390e..04222d060 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
}
+void GPUAsynch::WaitIdle() const {
+ gpu_thread.WaitIdle();
+}
+
} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index a12f9bac4..1241ade1d 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -25,6 +25,7 @@ public:
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+ void WaitIdle() const override;
protected:
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 5eb1c461c..c71baee89 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -24,6 +24,7 @@ public:
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+ void WaitIdle() const override {}
protected:
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 5f039e4fd..758a37f14 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,8 +5,6 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
-#include "core/core_timing.h"
-#include "core/core_timing_util.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
@@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() {
void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
- synchronization_event = system.CoreTiming().RegisterEvent(
- "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
- const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
- const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})};
- system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
+ PushCommand(SubmitListCommand(std::move(entries)));
}
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
InvalidateRegion(addr, size);
}
+void ThreadManager::WaitIdle() const {
+ while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) {
+ }
+}
+
u64 ThreadManager::PushCommand(CommandData&& command_data) {
const u64 fence{++state.last_fence};
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
return fence;
}
-MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-void SynchState::WaitForSynchronization(u64 fence) {
- while (signaled_fence.load() < fence)
- ;
-}
-
} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 3ae0ec9f3..08dc96bb3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -21,9 +21,6 @@ class DmaPusher;
namespace Core {
class System;
-namespace Timing {
-struct EventType;
-} // namespace Timing
} // namespace Core
namespace VideoCommon::GPUThread {
@@ -89,8 +86,6 @@ struct CommandDataContainer {
struct SynchState final {
std::atomic_bool is_running{true};
- void WaitForSynchronization(u64 fence);
-
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
CommandQueue queue;
u64 last_fence{};
@@ -121,6 +116,9 @@ public:
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
+ // Wait until the gpu thread is idle.
+ void WaitIdle() const;
+
private:
/// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data);
@@ -128,7 +126,6 @@ private:
private:
SynchState state;
Core::System& system;
- Core::Timing::EventType* synchronization_event{};
std::thread thread;
std::thread::id thread_id;
};
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index dbaeac6db..42031d80a 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -11,6 +11,77 @@
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
namespace Tegra {
+namespace {
+enum class Operation : u32 {
+ ALU = 0,
+ AddImmediate = 1,
+ ExtractInsert = 2,
+ ExtractShiftLeftImmediate = 3,
+ ExtractShiftLeftRegister = 4,
+ Read = 5,
+ Unused = 6, // This operation doesn't seem to be a valid encoding.
+ Branch = 7,
+};
+} // Anonymous namespace
+
+enum class MacroInterpreter::ALUOperation : u32 {
+ Add = 0,
+ AddWithCarry = 1,
+ Subtract = 2,
+ SubtractWithBorrow = 3,
+ // Operations 4-7 don't seem to be valid encodings.
+ Xor = 8,
+ Or = 9,
+ And = 10,
+ AndNot = 11,
+ Nand = 12
+};
+
+enum class MacroInterpreter::ResultOperation : u32 {
+ IgnoreAndFetch = 0,
+ Move = 1,
+ MoveAndSetMethod = 2,
+ FetchAndSend = 3,
+ MoveAndSend = 4,
+ FetchAndSetMethod = 5,
+ MoveAndSetMethodFetchAndSend = 6,
+ MoveAndSetMethodSend = 7
+};
+
+enum class MacroInterpreter::BranchCondition : u32 {
+ Zero = 0,
+ NotZero = 1,
+};
+
+union MacroInterpreter::Opcode {
+ u32 raw;
+ BitField<0, 3, Operation> operation;
+ BitField<4, 3, ResultOperation> result_operation;
+ BitField<4, 1, BranchCondition> branch_condition;
+ // If set on a branch, then the branch doesn't have a delay slot.
+ BitField<5, 1, u32> branch_annul;
+ BitField<7, 1, u32> is_exit;
+ BitField<8, 3, u32> dst;
+ BitField<11, 3, u32> src_a;
+ BitField<14, 3, u32> src_b;
+ // The signed immediate overlaps the second source operand and the alu operation.
+ BitField<14, 18, s32> immediate;
+
+ BitField<17, 5, ALUOperation> alu_operation;
+
+ // Bitfield instructions data
+ BitField<17, 5, u32> bf_src_bit;
+ BitField<22, 5, u32> bf_size;
+ BitField<27, 5, u32> bf_dst_bit;
+
+ u32 GetBitfieldMask() const {
+ return (1 << bf_size) - 1;
+ }
+
+ s32 GetBranchTarget() const {
+ return static_cast<s32>(immediate * sizeof(u32));
+ }
+};
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index 76b6a895b..631146d89 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -6,7 +6,6 @@
#include <array>
#include <optional>
-#include <vector>
#include "common/bit_field.h"
#include "common/common_types.h"
@@ -28,75 +27,11 @@ public:
void Execute(u32 offset, std::size_t num_parameters, const u32* parameters);
private:
- enum class Operation : u32 {
- ALU = 0,
- AddImmediate = 1,
- ExtractInsert = 2,
- ExtractShiftLeftImmediate = 3,
- ExtractShiftLeftRegister = 4,
- Read = 5,
- Unused = 6, // This operation doesn't seem to be a valid encoding.
- Branch = 7,
- };
-
- enum class ALUOperation : u32 {
- Add = 0,
- AddWithCarry = 1,
- Subtract = 2,
- SubtractWithBorrow = 3,
- // Operations 4-7 don't seem to be valid encodings.
- Xor = 8,
- Or = 9,
- And = 10,
- AndNot = 11,
- Nand = 12
- };
-
- enum class ResultOperation : u32 {
- IgnoreAndFetch = 0,
- Move = 1,
- MoveAndSetMethod = 2,
- FetchAndSend = 3,
- MoveAndSend = 4,
- FetchAndSetMethod = 5,
- MoveAndSetMethodFetchAndSend = 6,
- MoveAndSetMethodSend = 7
- };
+ enum class ALUOperation : u32;
+ enum class BranchCondition : u32;
+ enum class ResultOperation : u32;
- enum class BranchCondition : u32 {
- Zero = 0,
- NotZero = 1,
- };
-
- union Opcode {
- u32 raw;
- BitField<0, 3, Operation> operation;
- BitField<4, 3, ResultOperation> result_operation;
- BitField<4, 1, BranchCondition> branch_condition;
- BitField<5, 1, u32>
- branch_annul; // If set on a branch, then the branch doesn't have a delay slot.
- BitField<7, 1, u32> is_exit;
- BitField<8, 3, u32> dst;
- BitField<11, 3, u32> src_a;
- BitField<14, 3, u32> src_b;
- // The signed immediate overlaps the second source operand and the alu operation.
- BitField<14, 18, s32> immediate;
-
- BitField<17, 5, ALUOperation> alu_operation;
-
- // Bitfield instructions data
- BitField<17, 5, u32> bf_src_bit;
- BitField<22, 5, u32> bf_size;
- BitField<27, 5, u32> bf_dst_bit;
-
- u32 GetBitfieldMask() const {
- return (1 << bf_size) - 1;
- }
-
- s32 GetBranchTarget() const {
- return static_cast<s32>(immediate * sizeof(u32));
- }
- };
+ union Opcode;
union MethodAddress {
u32 raw;
@@ -149,9 +84,10 @@ private:
Engines::Maxwell3D& maxwell3d;
- u32 pc; ///< Current program counter
- std::optional<u32>
- delayed_pc; ///< Program counter to execute at after the delay slot is executed.
+ /// Current program counter
+ u32 pc;
+ /// Program counter to execute at after the delay slot is executed.
+ std::optional<u32> delayed_pc;
static constexpr std::size_t NumMacroRegisters = 8;
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index ab71870ab..2f2fe6859 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -93,6 +93,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::DXT23_SRGB>,
MortonCopy<true, PixelFormat::DXT45_SRGB>,
MortonCopy<true, PixelFormat::BC7U_SRGB>,
+ MortonCopy<true, PixelFormat::R4G4B4A4U>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
@@ -101,6 +102,17 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X6>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X10>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_12X12>,
+ MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X6>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X5>,
+ MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
+ MortonCopy<true, PixelFormat::E5B9G9R9F>,
MortonCopy<true, PixelFormat::Z32F>,
MortonCopy<true, PixelFormat::Z16>,
MortonCopy<true, PixelFormat::Z24S8>,
@@ -162,6 +174,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::DXT23_SRGB>,
MortonCopy<false, PixelFormat::DXT45_SRGB>,
MortonCopy<false, PixelFormat::BC7U_SRGB>,
+ MortonCopy<false, PixelFormat::R4G4B4A4U>,
nullptr,
nullptr,
nullptr,
@@ -170,6 +183,17 @@ static constexpr ConversionArray linear_to_morton_fns = {
nullptr,
nullptr,
nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ MortonCopy<false, PixelFormat::E5B9G9R9F>,
MortonCopy<false, PixelFormat::Z32F>,
MortonCopy<false, PixelFormat::Z16>,
MortonCopy<false, PixelFormat::Z24S8>,
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp
new file mode 100644
index 000000000..b230dcc18
--- /dev/null
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -0,0 +1,63 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <mutex>
+
+#include <boost/icl/interval_map.hpp>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/memory.h"
+#include "video_core/rasterizer_accelerated.h"
+
+namespace VideoCore {
+
+namespace {
+
+template <typename Map, typename Interval>
+constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
+ return boost::make_iterator_range(map.equal_range(interval));
+}
+
+} // Anonymous namespace
+
+RasterizerAccelerated::RasterizerAccelerated() = default;
+
+RasterizerAccelerated::~RasterizerAccelerated() = default;
+
+void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+ std::lock_guard lock{pages_mutex};
+ const u64 page_start{addr >> Memory::PAGE_BITS};
+ const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
+
+ // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
+ // subtract after iterating
+ const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end);
+ if (delta > 0) {
+ cached_pages.add({pages_interval, delta});
+ }
+
+ for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
+ const auto interval = pair.first & pages_interval;
+ const int count = pair.second;
+
+ const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
+ const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
+ const u64 interval_size = interval_end_addr - interval_start_addr;
+
+ if (delta > 0 && count == delta) {
+ Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
+ } else if (delta < 0 && count == -delta) {
+ Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
+ } else {
+ ASSERT(count >= 0);
+ }
+ }
+
+ if (delta < 0) {
+ cached_pages.add({pages_interval, delta});
+ }
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h
new file mode 100644
index 000000000..8f7e3547e
--- /dev/null
+++ b/src/video_core/rasterizer_accelerated.h
@@ -0,0 +1,31 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <mutex>
+
+#include <boost/icl/interval_map.hpp>
+
+#include "common/common_types.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCore {
+
+/// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface.
+class RasterizerAccelerated : public RasterizerInterface {
+public:
+ explicit RasterizerAccelerated();
+ ~RasterizerAccelerated() override;
+
+ void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
+
+private:
+ using CachedPageMap = boost::icl::interval_map<u64, int>;
+ CachedPageMap cached_pages;
+
+ std::mutex pages_mutex;
+};
+
+} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index f8a807c84..0375fca17 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -8,13 +8,17 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
@@ -26,11 +30,22 @@ CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t siz
CachedBufferBlock::~CachedBufferBlock() = default;
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
- std::size_t stream_size)
- : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{
- rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
+ const Device& device, std::size_t stream_size)
+ : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
+ if (!device.HasFastBufferSubData()) {
+ return;
+ }
+
+ static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
+ glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+ for (const GLuint cbuf : cbufs) {
+ glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
+ }
+}
-OGLBufferCache::~OGLBufferCache() = default;
+OGLBufferCache::~OGLBufferCache() {
+ glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+}
Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(cache_addr, size);
@@ -69,4 +84,12 @@ void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
static_cast<GLsizeiptr>(size));
}
+OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
+ std::size_t size) {
+ DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
+ const GLuint& cbuf = cbufs[cbuf_cursor++];
+ glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
+ return {&cbuf, 0};
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 022e7bfa9..8c7145443 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -4,10 +4,12 @@
#pragma once
+#include <array>
#include <memory>
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -18,12 +20,14 @@ class System;
namespace OpenGL {
+class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
class CachedBufferBlock;
using Buffer = std::shared_ptr<CachedBufferBlock>;
+using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class CachedBufferBlock : public VideoCommon::BufferBlock {
public:
@@ -38,14 +42,18 @@ private:
OGLBuffer gl_buffer{};
};
-class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> {
+class OGLBufferCache final : public GenericBufferCache {
public:
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
- std::size_t stream_size);
+ const Device& device, std::size_t stream_size);
~OGLBufferCache();
const GLuint* GetEmptyBuffer(std::size_t) override;
+ void Acquire() noexcept {
+ cbuf_cursor = 0;
+ }
+
protected:
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
@@ -61,6 +69,14 @@ protected:
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) override;
+
+ BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
+
+private:
+ std::size_t cbuf_cursor = 0;
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
+ Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
+ cbufs;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 64de7e425..b30d5be74 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -51,19 +51,24 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view
} // Anonymous namespace
Device::Device() {
+ const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::vector extensions = GetExtensions();
+ const bool is_nvidia = vendor == "NVIDIA Corporation";
+
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
+ has_shader_ballot = GLAD_GL_ARB_shader_ballot;
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = TestComponentIndexingBug();
has_precise_bug = TestPreciseBug();
+ has_fast_buffer_sub_data = is_nvidia;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
@@ -75,6 +80,7 @@ Device::Device(std::nullptr_t) {
max_vertex_attributes = 16;
max_varyings = 15;
has_warp_intrinsics = true;
+ has_shader_ballot = true;
has_vertex_viewport_layer = true;
has_image_load_formatted = true;
has_variable_aoffi = true;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index bb273c3d6..6c86fe207 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -34,6 +34,10 @@ public:
return has_warp_intrinsics;
}
+ bool HasShaderBallot() const {
+ return has_shader_ballot;
+ }
+
bool HasVertexViewportLayer() const {
return has_vertex_viewport_layer;
}
@@ -54,6 +58,10 @@ public:
return has_precise_bug;
}
+ bool HasFastBufferSubData() const {
+ return has_fast_buffer_sub_data;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestComponentIndexingBug();
@@ -64,11 +72,13 @@ private:
u32 max_vertex_attributes{};
u32 max_varyings{};
bool has_warp_intrinsics{};
+ bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
bool has_image_load_formatted{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
+ bool has_fast_buffer_sub_data{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6a17bed72..05f8e511b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -67,9 +67,7 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info)
: texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
- system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
- OpenGLState::ApplyDefaultState();
-
+ system{system}, screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
@@ -259,10 +257,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
continue;
}
- const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
-
GLShader::MaxwellUniformData ubo{};
- ubo.SetFromRegs(gpu, stage);
+ ubo.SetFromRegs(gpu);
const auto [buffer, offset] =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
@@ -271,10 +267,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
Shader shader{shader_cache.GetStageProgram(program)};
- const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
- SetupDrawConstBuffers(stage_enum, shader);
- SetupDrawGlobalMemory(stage_enum, shader);
- const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)};
+ // Stage indices are 0 - 5
+ const auto stage = static_cast<Maxwell::ShaderStage>(index == 0 ? 0 : index - 1);
+ SetupDrawConstBuffers(stage, shader);
+ SetupDrawGlobalMemory(stage, shader);
+ const auto texture_buffer_usage{SetupDrawTextures(stage, shader, base_bindings)};
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
@@ -342,41 +339,6 @@ std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
}
-template <typename Map, typename Interval>
-static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
- return boost::make_iterator_range(map.equal_range(interval));
-}
-
-void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
- const u64 page_start{addr >> Memory::PAGE_BITS};
- const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
-
- // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
- // subtract after iterating
- const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end);
- if (delta > 0)
- cached_pages.add({pages_interval, delta});
-
- for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
- const auto interval = pair.first & pages_interval;
- const int count = pair.second;
-
- const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
- const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
- const u64 interval_size = interval_end_addr - interval_start_addr;
-
- if (delta > 0 && count == delta)
- Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
- else if (delta < 0 && count == -delta)
- Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
- else
- ASSERT(count >= 0);
- }
-
- if (delta < 0)
- cached_pages.add({pages_interval, delta});
-}
-
void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
shader_cache.LoadDiskCache(stop_loading, callback);
@@ -412,7 +374,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
fbkey.colors[index] = std::move(color_surface);
}
- fbkey.colors_count = regs.rt_control.count;
+ fbkey.colors_count = static_cast<u16>(regs.rt_control.count);
if (depth_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even if
@@ -595,6 +557,8 @@ void RasterizerOpenGL::DrawPrelude() {
SyncPolygonOffset();
SyncAlphaTest();
+ buffer_cache.Acquire();
+
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
@@ -916,7 +880,8 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
const auto alignment = device.GetUniformBufferAlignment();
- const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
+ const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
+ device.HasFastBufferSubData());
bind_ubo_pushbuffer.Push(cbuf, offset, size);
}
@@ -968,14 +933,14 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
- const auto texture = [&]() {
+ const auto texture = [&] {
if (!entry.IsBindless()) {
return maxwell3d.GetStageTexture(stage, entry.GetOffset());
}
- const auto cbuf = entry.GetBindlessCBuf();
- Tegra::Texture::TextureHandle tex_handle;
- tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second);
- return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
+ const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
+ const Tegra::Texture::TextureHandle tex_handle =
+ maxwell3d.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
+ return maxwell3d.GetTextureInfo(tex_handle);
}();
if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) {
@@ -998,14 +963,13 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel)
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
- const auto texture = [&]() {
+ const auto texture = [&] {
if (!entry.IsBindless()) {
return compute.GetTexture(entry.GetOffset());
}
- const auto cbuf = entry.GetBindlessCBuf();
- Tegra::Texture::TextureHandle tex_handle;
- tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
- return compute.GetTextureInfo(tex_handle, entry.GetOffset());
+ const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32(
+ Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset());
+ return compute.GetTextureInfo(tex_handle);
}();
if (SetupTexture(bindpoint, texture, entry)) {
@@ -1043,14 +1007,13 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
const auto& entries = shader->GetShaderEntries().images;
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
- const auto tic = [&]() {
+ const auto tic = [&] {
if (!entry.IsBindless()) {
return compute.GetTexture(entry.GetOffset()).tic;
}
- const auto cbuf = entry.GetBindlessCBuf();
- Tegra::Texture::TextureHandle tex_handle;
- tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
- return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
+ const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32(
+ Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset());
+ return compute.GetTextureInfo(tex_handle).tic;
}();
SetupImage(bindpoint, tic, entry);
}
@@ -1091,6 +1054,15 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
}
state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
+
+ bool flip_y = false;
+ if (regs.viewport_transform[0].scale_y < 0.0) {
+ flip_y = !flip_y;
+ }
+ if (regs.screen_y_control.y_negate != 0) {
+ flip_y = !flip_y;
+ }
+ state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
}
void RasterizerOpenGL::SyncClipEnabled(
@@ -1113,28 +1085,14 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- auto& maxwell3d = system.GPU().Maxwell3D();
-
- const auto& regs = maxwell3d.regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.cull.enabled = regs.cull.enabled != 0;
if (state.cull.enabled) {
- state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
-
- const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 ||
- regs.viewport_transform[0].scale_y < 0.0f};
-
- // If the GPU is configured to flip the rasterized triangles, then we need to flip the
- // notion of front and back. Note: We flip the triangles when the value of the register is 0
- // because OpenGL already does it for us.
- if (flip_triangles) {
- if (state.cull.front_face == GL_CCW)
- state.cull.front_face = GL_CW;
- else if (state.cull.front_face == GL_CW)
- state.cull.front_face = GL_CCW;
- }
}
+
+ state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
@@ -1340,7 +1298,9 @@ void RasterizerOpenGL::SyncPolygonOffset() {
state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
- state.polygon_offset.units = regs.polygon_offset_units;
+
+ // Hardware divides polygon offset units by two
+ state.polygon_offset.units = regs.polygon_offset_units / 2.0f;
state.polygon_offset.factor = regs.polygon_offset_factor;
state.polygon_offset.clamp = regs.polygon_offset_clamp;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9c10ebda3..bd6fe5c3a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -13,12 +13,12 @@
#include <tuple>
#include <utility>
-#include <boost/icl/interval_map.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -51,7 +51,7 @@ namespace OpenGL {
struct ScreenInfo;
struct DrawParameters;
-class RasterizerOpenGL : public VideoCore::RasterizerInterface {
+class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info);
@@ -72,7 +72,6 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
- void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
@@ -227,9 +226,6 @@ private:
AccelDraw accelerate_draw = AccelDraw::Disabled;
OGLFramebuffer clear_framebuffer;
-
- using CachedPageMap = boost::icl::interval_map<u64, int>;
- CachedPageMap cached_pages;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 42ca3b1bd..04a239a39 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -3,13 +3,16 @@
// Refer to the license.txt file included.
#include <mutex>
+#include <optional>
+#include <string>
#include <thread>
+#include <unordered_set>
#include <boost/functional/hash.hpp>
#include "common/assert.h"
-#include "common/hash.h"
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
+#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -21,18 +24,20 @@
namespace OpenGL {
+using Tegra::Engines::ShaderType;
+using VideoCommon::Shader::ConstBufferLocker;
using VideoCommon::Shader::ProgramCode;
+using VideoCommon::Shader::ShaderIR;
+
+namespace {
// One UBO is always reserved for emulation values on staged shaders
constexpr u32 STAGE_RESERVED_UBOS = 1;
-struct UnspecializedShader {
- std::string code;
- GLShader::ShaderEntries entries;
- ProgramType program_type;
-};
+constexpr u32 STAGE_MAIN_OFFSET = 10;
+constexpr u32 KERNEL_MAIN_OFFSET = 0;
-namespace {
+constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
/// Gets the address for the specified shader stage program
GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
@@ -41,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program)
return gpu.regs.code_address.CodeAddress() + shader_config.offset;
}
+/// Gets if the current instruction offset is a scheduler instruction
+constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+ // Sched instructions appear once every 4 instructions.
+ constexpr std::size_t SchedPeriod = 4;
+ const std::size_t absolute_offset = offset - main_offset;
+ return (absolute_offset % SchedPeriod) == 0;
+}
+
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
+ constexpr std::size_t start_offset = 10;
+ // This is the encoded version of BRA that jumps to itself. All Nvidia
+ // shaders end with one.
+ constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
+ constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
+ std::size_t offset = start_offset;
+ while (offset < program.size()) {
+ const u64 instruction = program[offset];
+ if (!IsSchedInstruction(offset, start_offset)) {
+ if ((instruction & mask) == self_jumping_branch) {
+ // End on Maxwell's "nop" instruction
+ break;
+ }
+ if (instruction == 0) {
+ break;
+ }
+ }
+ offset++;
+ }
+ // The last instruction is included in the program size
+ return std::min(offset + 1, program.size());
+}
+
/// Gets the shader program code from memory for the specified address
ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
const u8* host_ptr) {
@@ -51,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
});
memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
program_code.size() * sizeof(u64));
+ program_code.resize(CalculateProgramSize(program_code));
return program_code;
}
@@ -71,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) {
}
}
-/// Gets if the current instruction offset is a scheduler instruction
-constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
- // Sched instructions appear once every 4 instructions.
- constexpr std::size_t SchedPeriod = 4;
- const std::size_t absolute_offset = offset - main_offset;
- return (absolute_offset % SchedPeriod) == 0;
-}
-
/// Describes primitive behavior on geometry shaders
constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
switch (primitive_mode) {
@@ -121,110 +152,151 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) {
return {};
}
-/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
- constexpr std::size_t start_offset = 10;
- // This is the encoded version of BRA that jumps to itself. All Nvidia
- // shaders end with one.
- constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
- constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
- std::size_t offset = start_offset;
- std::size_t size = start_offset * sizeof(u64);
- while (offset < program.size()) {
- const u64 instruction = program[offset];
- if (!IsSchedInstruction(offset, start_offset)) {
- if ((instruction & mask) == self_jumping_branch) {
- // End on Maxwell's "nop" instruction
- break;
- }
- if (instruction == 0) {
- break;
- }
- }
- size += sizeof(u64);
- offset++;
- }
- // The last instruction is included in the program size
- return std::min(size + sizeof(u64), program.size() * sizeof(u64));
-}
-
/// Hashes one (or two) program streams
u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
- const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
- if (size_a == 0) {
- size_a = CalculateProgramSize(code);
- }
- u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
- if (program_type != ProgramType::VertexA) {
- return unique_identifier;
- }
- // VertexA programs include two programs
-
- std::size_t seed = 0;
- boost::hash_combine(seed, unique_identifier);
-
- if (size_b == 0) {
- size_b = CalculateProgramSize(code_b);
+ const ProgramCode& code_b) {
+ u64 unique_identifier = boost::hash_value(code);
+ if (program_type == ProgramType::VertexA) {
+ // VertexA programs include two programs
+ boost::hash_combine(unique_identifier, boost::hash_value(code_b));
}
- const u64 identifier_b =
- Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
- boost::hash_combine(seed, identifier_b);
- return static_cast<u64>(seed);
+ return unique_identifier;
}
/// Creates an unspecialized program from code streams
-GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
- ProgramCode program_code, ProgramCode program_code_b) {
- GLShader::ShaderSetup setup(program_code);
- setup.program.size_a = CalculateProgramSize(program_code);
- setup.program.size_b = 0;
- if (program_type == ProgramType::VertexA) {
- // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
- // Conventional HW does not support this, so we combine VertexA and VertexB into one
- // stage here.
- setup.SetProgramB(program_code_b);
- setup.program.size_b = CalculateProgramSize(program_code_b);
- }
- setup.program.unique_identifier = GetUniqueIdentifier(
- program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
-
+std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir,
+ const std::optional<ShaderIR>& ir_b) {
switch (program_type) {
case ProgramType::VertexA:
case ProgramType::VertexB:
- return GLShader::GenerateVertexShader(device, setup);
+ return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
case ProgramType::Geometry:
- return GLShader::GenerateGeometryShader(device, setup);
+ return GLShader::GenerateGeometryShader(device, ir);
case ProgramType::Fragment:
- return GLShader::GenerateFragmentShader(device, setup);
+ return GLShader::GenerateFragmentShader(device, ir);
case ProgramType::Compute:
- return GLShader::GenerateComputeShader(device, setup);
+ return GLShader::GenerateComputeShader(device, ir);
default:
UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
return {};
}
}
-CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
- ProgramType program_type, const ProgramVariant& variant,
- bool hint_retrievable = false) {
+constexpr const char* GetProgramTypeName(ProgramType program_type) {
+ switch (program_type) {
+ case ProgramType::VertexA:
+ case ProgramType::VertexB:
+ return "VS";
+ case ProgramType::TessellationControl:
+ return "TCS";
+ case ProgramType::TessellationEval:
+ return "TES";
+ case ProgramType::Geometry:
+ return "GS";
+ case ProgramType::Fragment:
+ return "FS";
+ case ProgramType::Compute:
+ return "CS";
+ }
+ return "UNK";
+}
+
+Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) {
+ switch (program_type) {
+ case ProgramType::VertexA:
+ case ProgramType::VertexB:
+ return Tegra::Engines::ShaderType::Vertex;
+ case ProgramType::TessellationControl:
+ return Tegra::Engines::ShaderType::TesselationControl;
+ case ProgramType::TessellationEval:
+ return Tegra::Engines::ShaderType::TesselationEval;
+ case ProgramType::Geometry:
+ return Tegra::Engines::ShaderType::Geometry;
+ case ProgramType::Fragment:
+ return Tegra::Engines::ShaderType::Fragment;
+ case ProgramType::Compute:
+ return Tegra::Engines::ShaderType::Compute;
+ }
+ UNREACHABLE();
+ return {};
+}
+
+std::string GetShaderId(u64 unique_identifier, ProgramType program_type) {
+ return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier);
+}
+
+Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(
+ Core::System& system, ProgramType program_type) {
+ if (program_type == ProgramType::Compute) {
+ return system.GPU().KeplerCompute();
+ } else {
+ return system.GPU().Maxwell3D();
+ }
+}
+
+std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) {
+ return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type),
+ GetConstBufferEngineInterface(system, program_type));
+}
+
+void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
+ for (const auto& key : usage.keys) {
+ const auto [buffer, offset] = key.first;
+ locker.InsertKey(buffer, offset, key.second);
+ }
+ for (const auto& [offset, sampler] : usage.bound_samplers) {
+ locker.InsertBoundSampler(offset, sampler);
+ }
+ for (const auto& [key, sampler] : usage.bindless_samplers) {
+ const auto [buffer, offset] = key;
+ locker.InsertBindlessSampler(buffer, offset, sampler);
+ }
+}
+
+CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type,
+ const ProgramCode& program_code, const ProgramCode& program_code_b,
+ const ProgramVariant& variant, ConstBufferLocker& locker,
+ bool hint_retrievable = false) {
+ LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type));
+
+ const bool is_compute = program_type == ProgramType::Compute;
+ const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
+ const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker);
+ std::optional<ShaderIR> ir_b;
+ if (!program_code_b.empty()) {
+ ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker);
+ }
+ const auto entries = GLShader::GetEntries(ir);
+
auto base_bindings{variant.base_bindings};
const auto primitive_mode{variant.primitive_mode};
const auto texture_buffer_usage{variant.texture_buffer_usage};
- std::string source = R"(#version 430 core
+ std::string source = fmt::format(R"(// {}
+#version 430 core
#extension GL_ARB_separate_shader_objects : enable
-#extension GL_ARB_shader_viewport_layer_array : enable
-#extension GL_EXT_shader_image_load_formatted : enable
-#extension GL_NV_gpu_shader5 : enable
-#extension GL_NV_shader_thread_group : enable
-#extension GL_NV_shader_thread_shuffle : enable
-)";
- if (program_type == ProgramType::Compute) {
+)",
+ GetShaderId(unique_identifier, program_type));
+ if (is_compute) {
source += "#extension GL_ARB_compute_variable_group_size : require\n";
}
+ if (device.HasShaderBallot()) {
+ source += "#extension GL_ARB_shader_ballot : require\n";
+ }
+ if (device.HasVertexViewportLayer()) {
+ source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
+ }
+ if (device.HasImageLoadFormatted()) {
+ source += "#extension GL_EXT_shader_image_load_formatted : require\n";
+ }
+ if (device.HasWarpIntrinsics()) {
+ source += "#extension GL_NV_gpu_shader5 : require\n"
+ "#extension GL_NV_shader_thread_group : require\n"
+ "#extension GL_NV_shader_thread_shuffle : require\n";
+ }
source += '\n';
- if (program_type != ProgramType::Compute) {
+ if (!is_compute) {
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
}
@@ -268,7 +340,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
}
source += '\n';
- source += code;
+ source += GenerateGLSL(device, program_type, ir, ir_b);
OGLShader shader;
shader.Create(source.c_str(), GetShaderType(program_type));
@@ -278,85 +350,99 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
return program;
}
-std::set<GLenum> GetSupportedFormats() {
- std::set<GLenum> supported_formats;
-
+std::unordered_set<GLenum> GetSupportedFormats() {
GLint num_formats{};
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
std::vector<GLint> formats(num_formats);
glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
- for (const GLint format : formats)
+ std::unordered_set<GLenum> supported_formats;
+ for (const GLint format : formats) {
supported_formats.insert(static_cast<GLenum>(format));
+ }
return supported_formats;
}
} // Anonymous namespace
CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
- GLShader::ProgramResult result)
- : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr},
- unique_identifier{params.unique_identifier}, program_type{program_type},
- disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs},
- entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {}
+ GLShader::ShaderEntries entries, ProgramCode program_code,
+ ProgramCode program_code_b)
+ : RasterizerCacheObject{params.host_ptr}, system{params.system},
+ disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
+ unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries},
+ program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {
+ if (!params.precompiled_variants) {
+ return;
+ }
+ for (const auto& pair : *params.precompiled_variants) {
+ auto locker = MakeLocker(system, program_type);
+ const auto& usage = pair->first;
+ FillLocker(*locker, usage);
+
+ std::unique_ptr<LockerVariant>* locker_variant = nullptr;
+ const auto it =
+ std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
+ return variant->locker->HasEqualKeys(*locker);
+ });
+ if (it == locker_variants.end()) {
+ locker_variant = &locker_variants.emplace_back();
+ *locker_variant = std::make_unique<LockerVariant>();
+ locker_variant->get()->locker = std::move(locker);
+ } else {
+ locker_variant = &*it;
+ }
+ locker_variant->get()->programs.emplace(usage.variant, pair->second);
+ }
+}
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
Maxwell::ShaderProgram program_type,
- ProgramCode&& program_code,
- ProgramCode&& program_code_b) {
- const auto code_size{CalculateProgramSize(program_code)};
- const auto code_size_b{CalculateProgramSize(program_code_b)};
- auto result{
- CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
- if (result.first.empty()) {
- // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
- return {};
- }
-
+ ProgramCode program_code, ProgramCode program_code_b) {
params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
- params.unique_identifier, GetProgramType(program_type),
- static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)),
- std::move(program_code), std::move(program_code_b)));
-
- return std::shared_ptr<CachedShader>(
- new CachedShader(params, GetProgramType(program_type), std::move(result)));
-}
-
-Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type,
- GLShader::ProgramResult result) {
+ params.unique_identifier, GetProgramType(program_type), program_code, program_code_b));
+
+ ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)),
+ params.system.GPU().Maxwell3D());
+ const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
+ // TODO(Rodrigo): Handle VertexA shaders
+ // std::optional<ShaderIR> ir_b;
+ // if (!program_code_b.empty()) {
+ // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET);
+ // }
return std::shared_ptr<CachedShader>(
- new CachedShader(params, GetProgramType(program_type), std::move(result)));
+ new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir),
+ std::move(program_code), std::move(program_code_b)));
}
-Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
- auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
+Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
+ params.disk_cache.SaveRaw(
+ ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code));
- const auto code_size{CalculateProgramSize(code)};
- params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
- static_cast<u32>(code_size / sizeof(u64)), 0,
- std::move(code), {}));
-
- return std::shared_ptr<CachedShader>(
- new CachedShader(params, ProgramType::Compute, std::move(result)));
+ ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute,
+ params.system.GPU().KeplerCompute());
+ const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
+ return std::shared_ptr<CachedShader>(new CachedShader(
+ params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
}
-Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
- GLShader::ProgramResult result) {
- return std::shared_ptr<CachedShader>(
- new CachedShader(params, ProgramType::Compute, std::move(result)));
+Shader CachedShader::CreateFromCache(const ShaderParameters& params,
+ const UnspecializedShader& unspecialized) {
+ return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type,
+ unspecialized.entries, unspecialized.code,
+ unspecialized.code_b));
}
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
- const auto [entry, is_cache_miss] = programs.try_emplace(variant);
+ UpdateVariant();
+
+ const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant);
auto& program = entry->second;
if (is_cache_miss) {
- program = TryLoadProgram(variant);
- if (!program) {
- program = SpecializeShader(code, entries, program_type, variant);
- disk_cache.SaveUsage(GetUsage(variant));
- }
+ program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b,
+ variant, *curr_variant->locker);
+ disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker));
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
}
@@ -372,18 +458,33 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
return {program->handle, base_bindings};
}
-CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
- const auto found = precompiled_programs.find(GetUsage(variant));
- if (found == precompiled_programs.end()) {
- return {};
+void CachedShader::UpdateVariant() {
+ if (curr_variant && !curr_variant->locker->IsConsistent()) {
+ curr_variant = nullptr;
+ }
+ if (!curr_variant) {
+ for (auto& variant : locker_variants) {
+ if (variant->locker->IsConsistent()) {
+ curr_variant = variant.get();
+ }
+ }
+ }
+ if (!curr_variant) {
+ auto& new_variant = locker_variants.emplace_back();
+ new_variant = std::make_unique<LockerVariant>();
+ new_variant->locker = MakeLocker(system, program_type);
+ curr_variant = new_variant.get();
}
- return found->second;
}
-ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
+ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
+ const ConstBufferLocker& locker) const {
ShaderDiskCacheUsage usage;
usage.unique_identifier = unique_identifier;
usage.variant = variant;
+ usage.keys = locker.GetKeys();
+ usage.bound_samplers = locker.GetBoundSamplers();
+ usage.bindless_samplers = locker.GetBindlessSamplers();
return usage;
}
@@ -399,18 +500,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
const auto [raws, shader_usages] = *transferable;
-
- auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
-
- const auto supported_formats{GetSupportedFormats()};
- const auto unspecialized_shaders{
- GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
- if (stop_loading) {
+ if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
return;
}
- // Track if precompiled cache was altered during loading to know if we have to serialize the
- // virtual precompiled cache file back to the hard drive
+ const auto dumps = disk_cache.LoadPrecompiled();
+ const auto supported_formats = GetSupportedFormats();
+
+ // Track if precompiled cache was altered during loading to know if we have to
+ // serialize the virtual precompiled cache file back to the hard drive
bool precompiled_cache_altered = false;
// Inform the frontend about shader build initialization
@@ -433,9 +531,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
const auto& usage{shader_usages[i]};
- LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})",
- usage.unique_identifier, i, shader_usages.size());
-
const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
const auto dump{dumps.find(usage)};
@@ -449,21 +544,28 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
}
if (!shader) {
- shader = SpecializeShader(unspecialized.code, unspecialized.entries,
- unspecialized.program_type, usage.variant, true);
+ auto locker{MakeLocker(system, unspecialized.program_type)};
+ FillLocker(*locker, usage);
+ shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type,
+ unspecialized.code, unspecialized.code_b, usage.variant,
+ *locker, true);
}
- std::scoped_lock lock(mutex);
+ std::scoped_lock lock{mutex};
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
shader_usages.size());
}
precompiled_programs.emplace(usage, std::move(shader));
+
+ // TODO(Rodrigo): Is there a better way to do this?
+ precompiled_variants[usage.unique_identifier].push_back(
+ precompiled_programs.find(usage));
}
};
- const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)};
+ const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
const std::size_t bucket_size{shader_usages.size() / num_workers};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
@@ -483,7 +585,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
if (compilation_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
- dumps.clear();
precompiled_cache_altered = true;
return;
}
@@ -491,8 +592,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
- // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
- // precompiling them
+ // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
+ // before precompiling them
for (std::size_t i = 0; i < shader_usages.size(); ++i) {
const auto& usage{shader_usages[i]};
@@ -508,9 +609,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
}
-CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
- const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) {
+const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const {
+ const auto it = precompiled_variants.find(unique_identifier);
+ return it == precompiled_variants.end() ? nullptr : &it->second;
+}
+CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
+ const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
return {};
@@ -532,56 +637,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
return shader;
}
-std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders(
+bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
- const std::vector<ShaderDiskCacheRaw>& raws,
- const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
- std::unordered_map<u64, UnspecializedShader> unspecialized;
-
+ const std::vector<ShaderDiskCacheRaw>& raws) {
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
}
for (std::size_t i = 0; i < raws.size(); ++i) {
if (stop_loading) {
- return {};
+ return false;
}
const auto& raw{raws[i]};
const u64 unique_identifier{raw.GetUniqueIdentifier()};
const u64 calculated_hash{
GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())};
if (unique_identifier != calculated_hash) {
- LOG_ERROR(
- Render_OpenGL,
- "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache",
- raw.GetUniqueIdentifier(), calculated_hash);
+ LOG_ERROR(Render_OpenGL,
+ "Invalid hash in entry={:016x} (obtained hash={:016x}) - "
+ "removing shader cache",
+ raw.GetUniqueIdentifier(), calculated_hash);
disk_cache.InvalidateTransferable();
- return {};
+ return false;
}
- GLShader::ProgramResult result;
- if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) {
- // If it's stored in the precompiled file, avoid decompiling it here
- const auto& stored_decompiled{it->second};
- result = {stored_decompiled.code, stored_decompiled.entries};
- } else {
- // Otherwise decompile the shader at boot and save the result to the decompiled file
- result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(),
- raw.GetProgramCodeB());
- disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
- }
-
- precompiled_shaders.insert({unique_identifier, result});
-
- unspecialized.insert(
- {raw.GetUniqueIdentifier(),
- {std::move(result.first), std::move(result.second), raw.GetProgramType()}});
+ const u32 main_offset =
+ raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
+ ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType()));
+ const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker);
+ // TODO(Rodrigo): Handle VertexA shaders
+ // std::optional<ShaderIR> ir_b;
+ // if (raw.HasProgramA()) {
+ // ir_b.emplace(raw.GetProgramCodeB(), main_offset);
+ // }
+
+ UnspecializedShader unspecialized;
+ unspecialized.entries = GLShader::GetEntries(ir);
+ unspecialized.program_type = raw.GetProgramType();
+ unspecialized.code = raw.GetProgramCode();
+ unspecialized.code_b = raw.GetProgramCodeB();
+ unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
}
}
- return unspecialized;
+ return true;
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
@@ -590,37 +691,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
}
auto& memory_manager{system.GPU().MemoryManager()};
- const GPUVAddr program_addr{GetShaderAddress(system, program)};
+ const GPUVAddr address{GetShaderAddress(system, program)};
// Look up shader in the cache based on address
- const auto host_ptr{memory_manager.GetPointer(program_addr)};
+ const auto host_ptr{memory_manager.GetPointer(address)};
Shader shader{TryGet(host_ptr)};
if (shader) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
// No shader found - create a new one
- ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
- ProgramCode program_code_b;
- const bool is_program_a{program == Maxwell::ShaderProgram::VertexA};
- if (is_program_a) {
- const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
- program_code_b = GetShaderCode(memory_manager, program_addr_b,
- memory_manager.GetPointer(program_addr_b));
- }
-
- const auto unique_identifier =
- GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
- const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
- const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
- host_ptr, unique_identifier};
-
- const auto found = precompiled_shaders.find(unique_identifier);
- if (found == precompiled_shaders.end()) {
- shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code),
- std::move(program_code_b));
+ ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
+ ProgramCode code_b;
+ if (program == Maxwell::ShaderProgram::VertexA) {
+ const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
+ code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b));
+ }
+
+ const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b);
+ const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
+ const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
+ const ShaderParameters params{system, disk_cache, precompiled_variants, device,
+ cpu_addr, host_ptr, unique_identifier};
+
+ const auto found = unspecialized_shaders.find(unique_identifier);
+ if (found == unspecialized_shaders.end()) {
+ shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
+ std::move(code_b));
} else {
- shader = CachedShader::CreateStageFromCache(params, program, found->second);
+ shader = CachedShader::CreateFromCache(params, found->second);
}
Register(shader);
@@ -638,15 +737,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
// No kernel found - create a new one
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
+ const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
- const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
- host_ptr, unique_identifier};
+ const ShaderParameters params{system, disk_cache, precompiled_variants, device,
+ cpu_addr, host_ptr, unique_identifier};
- const auto found = precompiled_shaders.find(unique_identifier);
- if (found == precompiled_shaders.end()) {
+ const auto found = unspecialized_shaders.find(unique_identifier);
+ if (found == unspecialized_shaders.end()) {
kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
} else {
- kernel = CachedShader::CreateKernelFromCache(params, found->second);
+ kernel = CachedShader::CreateFromCache(params, found->second);
}
Register(kernel);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index de195cc5d..6bd7c9cf1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -8,9 +8,10 @@
#include <atomic>
#include <bitset>
#include <memory>
-#include <set>
+#include <string>
#include <tuple>
#include <unordered_map>
+#include <unordered_set>
#include <vector>
#include <glad/glad.h>
@@ -20,6 +21,8 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+#include "video_core/shader/const_buffer_locker.h"
+#include "video_core/shader/shader_ir.h"
namespace Core {
class System;
@@ -40,11 +43,19 @@ using Shader = std::shared_ptr<CachedShader>;
using CachedProgram = std::shared_ptr<OGLProgram>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
-using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
+using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
+
+struct UnspecializedShader {
+ GLShader::ShaderEntries entries;
+ ProgramType program_type;
+ ProgramCode code;
+ ProgramCode code_b;
+};
struct ShaderParameters {
+ Core::System& system;
ShaderDiskCacheOpenGL& disk_cache;
- const PrecompiledPrograms& precompiled_programs;
+ const PrecompiledVariants* precompiled_variants;
const Device& device;
VAddr cpu_addr;
u8* host_ptr;
@@ -55,23 +66,18 @@ class CachedShader final : public RasterizerCacheObject {
public:
static Shader CreateStageFromMemory(const ShaderParameters& params,
Maxwell::ShaderProgram program_type,
- ProgramCode&& program_code, ProgramCode&& program_code_b);
-
- static Shader CreateStageFromCache(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type,
- GLShader::ProgramResult result);
+ ProgramCode program_code, ProgramCode program_code_b);
+ static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
- static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code);
-
- static Shader CreateKernelFromCache(const ShaderParameters& params,
- GLShader::ProgramResult result);
+ static Shader CreateFromCache(const ShaderParameters& params,
+ const UnspecializedShader& unspecialized);
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
- return shader_length;
+ return program_code.size() * sizeof(u64);
}
/// Gets the shader entries for the shader
@@ -83,24 +89,36 @@ public:
std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
private:
+ struct LockerVariant {
+ std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
+ std::unordered_map<ProgramVariant, CachedProgram> programs;
+ };
+
explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
- GLShader::ProgramResult result);
+ GLShader::ShaderEntries entries, ProgramCode program_code,
+ ProgramCode program_code_b);
- CachedProgram TryLoadProgram(const ProgramVariant& variant) const;
+ void UpdateVariant();
- ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
+ ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
+ const VideoCommon::Shader::ConstBufferLocker& locker) const;
+
+ Core::System& system;
+ ShaderDiskCacheOpenGL& disk_cache;
+ const Device& device;
VAddr cpu_addr{};
+
u64 unique_identifier{};
ProgramType program_type{};
- ShaderDiskCacheOpenGL& disk_cache;
- const PrecompiledPrograms& precompiled_programs;
GLShader::ShaderEntries entries;
- std::string code;
- std::size_t shader_length{};
- std::unordered_map<ProgramVariant, CachedProgram> programs;
+ ProgramCode program_code;
+ ProgramCode program_code_b;
+
+ LockerVariant* curr_variant = nullptr;
+ std::vector<std::unique_ptr<LockerVariant>> locker_variants;
};
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -123,21 +141,26 @@ protected:
void FlushObjectInner(const Shader& object) override {}
private:
- std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
- const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
- const std::vector<ShaderDiskCacheRaw>& raws,
- const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
+ bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback,
+ const std::vector<ShaderDiskCacheRaw>& raws);
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
- const std::set<GLenum>& supported_formats);
+ const std::unordered_set<GLenum>& supported_formats);
+
+ const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
Core::System& system;
Core::Frontend::EmuWindow& emu_window;
const Device& device;
+
ShaderDiskCacheOpenGL disk_cache;
- PrecompiledShaders precompiled_shaders;
PrecompiledPrograms precompiled_programs;
+ std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
+
+ std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
+
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e6b36a0f2..4f2b49170 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -19,6 +19,7 @@
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/shader/ast.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
@@ -242,6 +243,26 @@ constexpr const char* GetTypeString(Type type) {
}
}
+constexpr const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
+ switch (image_type) {
+ case Tegra::Shader::ImageType::Texture1D:
+ return "1D";
+ case Tegra::Shader::ImageType::TextureBuffer:
+ return "Buffer";
+ case Tegra::Shader::ImageType::Texture1DArray:
+ return "1DArray";
+ case Tegra::Shader::ImageType::Texture2D:
+ return "2D";
+ case Tegra::Shader::ImageType::Texture2DArray:
+ return "2DArray";
+ case Tegra::Shader::ImageType::Texture3D:
+ return "3D";
+ default:
+ UNREACHABLE();
+ return "1D";
+ }
+}
+
/// Generates code to use for a swizzle operation.
constexpr const char* GetSwizzle(u32 element) {
constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
@@ -314,39 +335,24 @@ constexpr bool IsVertexShader(ProgramType stage) {
return stage == ProgramType::VertexA || stage == ProgramType::VertexB;
}
+class ASTDecompiler;
+class ExprDecompiler;
+
class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage,
std::string suffix)
: device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
- void Decompile() {
- DeclareVertex();
- DeclareGeometry();
- DeclareRegisters();
- DeclarePredicates();
- DeclareLocalMemory();
- DeclareSharedMemory();
- DeclareInternalFlags();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareConstantBuffers();
- DeclareGlobalMemory();
- DeclareSamplers();
- DeclarePhysicalAttributeReader();
- DeclareImages();
-
- code.AddLine("void execute_{}() {{", suffix);
- ++code.scope;
-
+ void DecompileBranchMode() {
// VM's program counter
const auto first_address = ir.GetBasicBlocks().begin()->first;
code.AddLine("uint jmp_to = {}U;", first_address);
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
+ constexpr u32 FLOW_STACK_SIZE = 20;
if (!ir.IsFlowStackDisabled()) {
- constexpr u32 FLOW_STACK_SIZE = 20;
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
@@ -372,38 +378,47 @@ public:
code.AddLine("default: return;");
code.AddLine("}}");
- for (std::size_t i = 0; i < 2; ++i) {
- --code.scope;
- code.AddLine("}}");
+ --code.scope;
+ code.AddLine("}}");
+ }
+
+ void DecompileAST();
+
+ void Decompile() {
+ DeclareVertex();
+ DeclareGeometry();
+ DeclareRegisters();
+ DeclarePredicates();
+ DeclareLocalMemory();
+ DeclareInternalFlags();
+ DeclareInputAttributes();
+ DeclareOutputAttributes();
+ DeclareConstantBuffers();
+ DeclareGlobalMemory();
+ DeclareSamplers();
+ DeclarePhysicalAttributeReader();
+
+ code.AddLine("void execute_{}() {{", suffix);
+ ++code.scope;
+
+ if (ir.IsDecompiled()) {
+ DecompileAST();
+ } else {
+ DecompileBranchMode();
}
+
+ --code.scope;
+ code.AddLine("}}");
}
std::string GetResult() {
return code.GetResult();
}
- ShaderEntries GetShaderEntries() const {
- ShaderEntries entries;
- for (const auto& cbuf : ir.GetConstantBuffers()) {
- entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
- cbuf.first);
- }
- for (const auto& sampler : ir.GetSamplers()) {
- entries.samplers.emplace_back(sampler);
- }
- for (const auto& [offset, image] : ir.GetImages()) {
- entries.images.emplace_back(image);
- }
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
- usage.is_read, usage.is_written);
- }
- entries.clip_distances = ir.GetClipDistances();
- entries.shader_length = ir.GetLength();
- return entries;
- }
-
private:
+ friend class ASTDecompiler;
+ friend class ExprDecompiler;
+
void DeclareVertex() {
if (!IsVertexShader(stage))
return;
@@ -720,27 +735,7 @@ private:
void DeclareImages() {
const auto& images{ir.GetImages()};
- for (const auto& [offset, image] : images) {
- const char* image_type = [&] {
- switch (image.GetType()) {
- case Tegra::Shader::ImageType::Texture1D:
- return "1D";
- case Tegra::Shader::ImageType::TextureBuffer:
- return "Buffer";
- case Tegra::Shader::ImageType::Texture1DArray:
- return "1DArray";
- case Tegra::Shader::ImageType::Texture2D:
- return "2D";
- case Tegra::Shader::ImageType::Texture2DArray:
- return "2DArray";
- case Tegra::Shader::ImageType::Texture3D:
- return "3D";
- default:
- UNREACHABLE();
- return "1D";
- }
- }();
-
+ for (const auto& image : images) {
std::string qualifier = "coherent volatile";
if (image.IsRead() && !image.IsWritten()) {
qualifier += " readonly";
@@ -748,13 +743,10 @@ private:
qualifier += " writeonly";
}
- std::string format;
- if (image.IsAtomic()) {
- format = "r32ui, ";
- }
-
+ const char* format = image.IsAtomic() ? "r32ui, " : "";
+ const char* type_declaration = GetImageTypeDeclaration(image.GetType());
code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format,
- image.GetIndex(), qualifier, image_type, GetImage(image));
+ image.GetIndex(), qualifier, type_declaration, GetImage(image));
}
if (!images.empty()) {
code.AddNewLine();
@@ -1135,7 +1127,7 @@ private:
for (const auto& variant : extras) {
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
expr += GenerateTextureArgument(*argument);
- } else if (std::get_if<TextureAoffi>(&variant)) {
+ } else if (std::holds_alternative<TextureAoffi>(variant)) {
expr += GenerateTextureAoffi(meta->aoffi);
} else {
UNREACHABLE();
@@ -1145,8 +1137,8 @@ private:
return expr + ')';
}
- std::string GenerateTextureArgument(TextureArgument argument) {
- const auto [type, operand] = argument;
+ std::string GenerateTextureArgument(const TextureArgument& argument) {
+ const auto& [type, operand] = argument;
if (operand == nullptr) {
return {};
}
@@ -1222,7 +1214,7 @@ private:
std::string BuildImageValues(Operation operation) {
constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
- const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
const std::size_t values_count{meta.values.size()};
std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
@@ -1387,6 +1379,26 @@ private:
return GenerateUnary(operation, "float", Type::Float, type);
}
+ Expression FSwizzleAdd(Operation operation) {
+ const std::string op_a = VisitOperand(operation, 0).AsFloat();
+ const std::string op_b = VisitOperand(operation, 1).AsFloat();
+
+ if (!device.HasShaderBallot()) {
+ LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+ return {fmt::format("{} + {}", op_a, op_b), Type::Float};
+ }
+
+ const std::string instr_mask = VisitOperand(operation, 2).AsUint();
+ const std::string mask = code.GenerateTemporary();
+ code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
+ instr_mask);
+
+ const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
+ const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
+ return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
+ Type::Float};
+ }
+
Expression ICastFloat(Operation operation) {
return GenerateUnary(operation, "int", Type::Int, Type::Float);
}
@@ -1494,6 +1506,8 @@ private:
case Tegra::Shader::HalfType::H1_H1:
return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
}
+ UNREACHABLE();
+ return {"0", Type::Int};
}
Expression HMergeF32(Operation operation) {
@@ -1676,7 +1690,7 @@ private:
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
return {GenerateTexture(operation, "Gather",
- {TextureArgument{type, meta->component}, TextureAoffi{}}) +
+ {TextureAoffi{}, TextureArgument{type, meta->component}}) +
GetSwizzle(meta->element),
Type::Float};
}
@@ -1765,14 +1779,14 @@ private:
return {"0", Type::Int};
}
- const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
Type::Uint};
}
Expression ImageStore(Operation operation) {
- const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
BuildIntegerCoordinates(operation), BuildImageValues(operation));
return {};
@@ -1780,7 +1794,7 @@ private:
template <const std::string_view& opname>
Expression AtomicImage(Operation operation) {
- const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
ASSERT(meta.values.size() == 1);
return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
@@ -1822,10 +1836,9 @@ private:
return {};
}
- Expression Exit(Operation operation) {
+ void PreExit() {
if (stage != ProgramType::Fragment) {
- code.AddLine("return;");
- return {};
+ return;
}
const auto& used_registers = ir.GetRegisters();
const auto SafeGetRegister = [&](u32 reg) -> Expression {
@@ -1857,7 +1870,10 @@ private:
// already contains one past the last color register.
code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
}
+ }
+ Expression Exit(Operation operation) {
+ PreExit();
code.AddLine("return;");
return {};
}
@@ -1876,10 +1892,6 @@ private:
Expression EmitVertex(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EmitVertex is expected to be used in a geometry shader.");
-
- // If a geometry shader is attached, it will always flip (it's the last stage before
- // fragment). For more info about flipping, refer to gl_shader_gen.cpp.
- code.AddLine("gl_Position.xy *= viewport_flip.xy;");
code.AddLine("EmitVertex();");
return {};
}
@@ -1887,14 +1899,12 @@ private:
Expression EndPrimitive(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EndPrimitive is expected to be used in a geometry shader.");
-
code.AddLine("EndPrimitive();");
return {};
}
Expression YNegate(Operation operation) {
- // Config pack's third value is Y_NEGATE's state.
- return {"config_pack[2]", Type::Uint};
+ return {"y_direction", Type::Float};
}
template <u32 element>
@@ -1946,34 +1956,24 @@ private:
return Vote(operation, "allThreadsEqualNV");
}
- template <const std::string_view& func>
- Expression Shuffle(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsFloat();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
- // On a "single-thread" device we are either on the same thread or out of bounds. Both
- // cases return the passed value.
- return {value, Type::Float};
+ Expression ThreadId(Operation operation) {
+ if (!device.HasShaderBallot()) {
+ LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+ return {"0U", Type::Uint};
}
-
- const std::string index = VisitOperand(operation, 1).AsUint();
- const std::string width = VisitOperand(operation, 2).AsUint();
- return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
+ return {"gl_SubGroupInvocationARB", Type::Uint};
}
- template <const std::string_view& func>
- Expression InRangeShuffle(Operation operation) {
- const std::string index = VisitOperand(operation, 0).AsUint();
- const std::string width = VisitOperand(operation, 1).AsUint();
- if (!device.HasWarpIntrinsics()) {
- // On a "single-thread" device we are only in bounds when the requested index is 0.
- return {fmt::format("({} == 0U)", index), Type::Bool};
+ Expression ShuffleIndexed(Operation operation) {
+ std::string value = VisitOperand(operation, 0).AsFloat();
+
+ if (!device.HasShaderBallot()) {
+ LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+ return {std::move(value), Type::Float};
}
- const std::string in_range = code.GenerateTemporary();
- code.AddLine("bool {};", in_range);
- code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
- return {in_range, Type::Bool};
+ const std::string index = VisitOperand(operation, 1).AsUint();
+ return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
}
struct Func final {
@@ -1985,11 +1985,6 @@ private:
static constexpr std::string_view Or = "Or";
static constexpr std::string_view Xor = "Xor";
static constexpr std::string_view Exchange = "Exchange";
-
- static constexpr std::string_view ShuffleIndexed = "shuffleNV";
- static constexpr std::string_view ShuffleUp = "shuffleUpNV";
- static constexpr std::string_view ShuffleDown = "shuffleDownNV";
- static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
};
static constexpr std::array operation_decompilers = {
@@ -2020,6 +2015,7 @@ private:
&GLSLDecompiler::FTrunc,
&GLSLDecompiler::FCastInteger<Type::Int>,
&GLSLDecompiler::FCastInteger<Type::Uint>,
+ &GLSLDecompiler::FSwizzleAdd,
&GLSLDecompiler::Add<Type::Int>,
&GLSLDecompiler::Mul<Type::Int>,
@@ -2155,15 +2151,8 @@ private:
&GLSLDecompiler::VoteAny,
&GLSLDecompiler::VoteEqual,
- &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>,
- &GLSLDecompiler::Shuffle<Func::ShuffleUp>,
- &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
- &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
-
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
+ &GLSLDecompiler::ThreadId,
+ &GLSLDecompiler::ShuffleIndexed,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2229,7 +2218,7 @@ private:
code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex());
}
- std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const {
+ std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
return fmt::format("{}_{}_{}", name, index, suffix);
}
@@ -2254,27 +2243,259 @@ private:
ShaderWriter code;
};
+std::string GetFlowVariable(u32 i) {
+ return fmt::format("flow_var_{}", i);
+}
+
+class ExprDecompiler {
+public:
+ explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
+
+ void operator()(const ExprAnd& expr) {
+ inner += "( ";
+ std::visit(*this, *expr.operand1);
+ inner += " && ";
+ std::visit(*this, *expr.operand2);
+ inner += ')';
+ }
+
+ void operator()(const ExprOr& expr) {
+ inner += "( ";
+ std::visit(*this, *expr.operand1);
+ inner += " || ";
+ std::visit(*this, *expr.operand2);
+ inner += ')';
+ }
+
+ void operator()(const ExprNot& expr) {
+ inner += '!';
+ std::visit(*this, *expr.operand1);
+ }
+
+ void operator()(const ExprPredicate& expr) {
+ const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
+ inner += decomp.GetPredicate(pred);
+ }
+
+ void operator()(const ExprCondCode& expr) {
+ const Node cc = decomp.ir.GetConditionCode(expr.cc);
+ std::string target;
+
+ if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
+ const auto index = pred->GetIndex();
+ switch (index) {
+ case Tegra::Shader::Pred::NeverExecute:
+ target = "false";
+ break;
+ case Tegra::Shader::Pred::UnusedIndex:
+ target = "true";
+ break;
+ default:
+ target = decomp.GetPredicate(index);
+ break;
+ }
+ } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
+ target = decomp.GetInternalFlag(flag->GetFlag());
+ } else {
+ UNREACHABLE();
+ }
+ inner += target;
+ }
+
+ void operator()(const ExprVar& expr) {
+ inner += GetFlowVariable(expr.var_index);
+ }
+
+ void operator()(const ExprBoolean& expr) {
+ inner += expr.value ? "true" : "false";
+ }
+
+ void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
+ inner +=
+ "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')';
+ }
+
+ const std::string& GetResult() const {
+ return inner;
+ }
+
+private:
+ std::string inner;
+ GLSLDecompiler& decomp;
+};
+
+class ASTDecompiler {
+public:
+ explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
+
+ void operator()(const ASTProgram& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(const ASTIfThen& ast) {
+ ExprDecompiler expr_parser{decomp};
+ std::visit(expr_parser, *ast.condition);
+ decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
+ decomp.code.scope++;
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ decomp.code.scope--;
+ decomp.code.AddLine("}}");
+ }
+
+ void operator()(const ASTIfElse& ast) {
+ decomp.code.AddLine("else {{");
+ decomp.code.scope++;
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ decomp.code.scope--;
+ decomp.code.AddLine("}}");
+ }
+
+ void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
+ UNREACHABLE();
+ }
+
+ void operator()(const ASTBlockDecoded& ast) {
+ decomp.VisitBlock(ast.nodes);
+ }
+
+ void operator()(const ASTVarSet& ast) {
+ ExprDecompiler expr_parser{decomp};
+ std::visit(expr_parser, *ast.condition);
+ decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
+ }
+
+ void operator()(const ASTLabel& ast) {
+ decomp.code.AddLine("// Label_{}:", ast.index);
+ }
+
+ void operator()([[maybe_unused]] const ASTGoto& ast) {
+ UNREACHABLE();
+ }
+
+ void operator()(const ASTDoWhile& ast) {
+ ExprDecompiler expr_parser{decomp};
+ std::visit(expr_parser, *ast.condition);
+ decomp.code.AddLine("do {{");
+ decomp.code.scope++;
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ decomp.code.scope--;
+ decomp.code.AddLine("}} while({});", expr_parser.GetResult());
+ }
+
+ void operator()(const ASTReturn& ast) {
+ const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
+ if (!is_true) {
+ ExprDecompiler expr_parser{decomp};
+ std::visit(expr_parser, *ast.condition);
+ decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
+ decomp.code.scope++;
+ }
+ if (ast.kills) {
+ decomp.code.AddLine("discard;");
+ } else {
+ decomp.PreExit();
+ decomp.code.AddLine("return;");
+ }
+ if (!is_true) {
+ decomp.code.scope--;
+ decomp.code.AddLine("}}");
+ }
+ }
+
+ void operator()(const ASTBreak& ast) {
+ const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
+ if (!is_true) {
+ ExprDecompiler expr_parser{decomp};
+ std::visit(expr_parser, *ast.condition);
+ decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
+ decomp.code.scope++;
+ }
+ decomp.code.AddLine("break;");
+ if (!is_true) {
+ decomp.code.scope--;
+ decomp.code.AddLine("}}");
+ }
+ }
+
+ void Visit(const ASTNode& node) {
+ std::visit(*this, *node->GetInnerData());
+ }
+
+private:
+ GLSLDecompiler& decomp;
+};
+
+void GLSLDecompiler::DecompileAST() {
+ const u32 num_flow_variables = ir.GetASTNumVariables();
+ for (u32 i = 0; i < num_flow_variables; i++) {
+ code.AddLine("bool {} = false;", GetFlowVariable(i));
+ }
+
+ ASTDecompiler decompiler{*this};
+ decompiler.Visit(ir.GetASTProgram());
+}
+
} // Anonymous namespace
+ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
+ ShaderEntries entries;
+ for (const auto& cbuf : ir.GetConstantBuffers()) {
+ entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
+ cbuf.first);
+ }
+ for (const auto& [base, usage] : ir.GetGlobalMemory()) {
+ entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
+ usage.is_written);
+ }
+ for (const auto& sampler : ir.GetSamplers()) {
+ entries.samplers.emplace_back(sampler);
+ }
+ for (const auto& image : ir.GetImages()) {
+ entries.images.emplace_back(image);
+ }
+ entries.clip_distances = ir.GetClipDistances();
+ entries.shader_length = ir.GetLength();
+ return entries;
+}
+
std::string GetCommonDeclarations() {
- return fmt::format(
- "#define ftoi floatBitsToInt\n"
- "#define ftou floatBitsToUint\n"
- "#define itof intBitsToFloat\n"
- "#define utof uintBitsToFloat\n\n"
- "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
- " bvec2 is_nan1 = isnan(pair1);\n"
- " bvec2 is_nan2 = isnan(pair2);\n"
- " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
- "is_nan2.y);\n"
- "}}\n\n");
+ return R"(#define ftoi floatBitsToInt
+#define ftou floatBitsToUint
+#define itof intBitsToFloat
+#define utof uintBitsToFloat
+
+bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
+ bvec2 is_nan1 = isnan(pair1);
+ bvec2 is_nan2 = isnan(pair2);
+ return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
+}
+
+const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
+const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
+)";
}
-ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
- const std::string& suffix) {
+std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
+ const std::string& suffix) {
GLSLDecompiler decompiler(device, ir, stage, suffix);
decompiler.Decompile();
- return {decompiler.GetResult(), decompiler.GetShaderEntries()};
+ return decompiler.GetResult();
}
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index e538dc001..b1e75e6cc 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -34,10 +34,7 @@ enum class ProgramType : u32 {
namespace OpenGL::GLShader {
-struct ShaderEntries;
-
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using ProgramResult = std::pair<std::string, ShaderEntries>;
using SamplerEntry = VideoCommon::Shader::Sampler;
using ImageEntry = VideoCommon::Shader::Image;
@@ -85,17 +82,18 @@ private:
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
+ std::vector<GlobalMemoryEntry> global_memory_entries;
std::vector<SamplerEntry> samplers;
- std::vector<SamplerEntry> bindless_samplers;
std::vector<ImageEntry> images;
- std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
};
+ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
+
std::string GetCommonDeclarations();
-ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- ProgramType stage, const std::string& suffix);
+std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ ProgramType stage, const std::string& suffix);
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 6a7012b54..184a565e6 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -22,6 +22,29 @@
namespace OpenGL {
+using VideoCommon::Shader::BindlessSamplerMap;
+using VideoCommon::Shader::BoundSamplerMap;
+using VideoCommon::Shader::KeyMap;
+
+namespace {
+
+struct ConstBufferKey {
+ u32 cbuf;
+ u32 offset;
+ u32 value;
+};
+
+struct BoundSamplerKey {
+ u32 offset;
+ Tegra::Engines::SamplerDescriptor sampler;
+};
+
+struct BindlessSamplerKey {
+ u32 cbuf;
+ u32 offset;
+ Tegra::Engines::SamplerDescriptor sampler;
+};
+
using ShaderCacheVersionHash = std::array<u8, 64>;
enum class TransferableEntryKind : u32 {
@@ -29,18 +52,10 @@ enum class TransferableEntryKind : u32 {
Usage,
};
-enum class PrecompiledEntryKind : u32 {
- Decompiled,
- Dump,
-};
-
-constexpr u32 NativeVersion = 4;
+constexpr u32 NativeVersion = 5;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 16);
-static_assert(sizeof(ShaderDiskCacheUsage) == 40);
-
-namespace {
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
@@ -49,13 +64,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
return hash;
}
-} // namespace
+} // Anonymous namespace
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
- u32 program_code_size, u32 program_code_size_b,
ProgramCode program_code, ProgramCode program_code_b)
: unique_identifier{unique_identifier}, program_type{program_type},
- program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
@@ -90,15 +103,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
if (file.WriteObject(unique_identifier) != 1 ||
file.WriteObject(static_cast<u32>(program_type)) != 1 ||
- file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) {
+ file.WriteObject(static_cast<u32>(program_code.size())) != 1 ||
+ file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) {
return false;
}
- if (file.WriteArray(program_code.data(), program_code_size) != program_code_size)
+ if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size())
return false;
if (HasProgramA() &&
- file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
+ file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) {
return false;
}
return true;
@@ -112,44 +126,47 @@ std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskC
ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
- if (!Settings::values.use_disk_shader_cache || !has_title_id)
+ if (!Settings::values.use_disk_shader_cache || !has_title_id) {
return {};
- tried_to_load = true;
+ }
FileUtil::IOFile file(GetTransferablePath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
GetTitleID());
+ is_usable = true;
return {};
}
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
LOG_ERROR(Render_OpenGL,
- "Failed to get transferable cache version for title id={} - skipping",
+ "Failed to get transferable cache version for title id={}, skipping",
GetTitleID());
return {};
}
if (version < NativeVersion) {
- LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing");
+ LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
file.Close();
InvalidateTransferable();
+ is_usable = true;
return {};
}
if (version > NativeVersion) {
LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
- "of the emulator - skipping");
+ "of the emulator, skipping");
return {};
}
// Version is valid, load the shaders
+ constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping";
std::vector<ShaderDiskCacheRaw> raws;
std::vector<ShaderDiskCacheUsage> usages;
while (file.Tell() < file.GetSize()) {
TransferableEntryKind kind{};
if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
- LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping");
+ LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping");
return {};
}
@@ -157,7 +174,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
case TransferableEntryKind::Raw: {
ShaderDiskCacheRaw entry;
if (!entry.Load(file)) {
- LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping");
+ LOG_ERROR(Render_OpenGL, error_loading);
return {};
}
transferable.insert({entry.GetUniqueIdentifier(), {}});
@@ -165,30 +182,62 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
break;
}
case TransferableEntryKind::Usage: {
- ShaderDiskCacheUsage usage{};
- if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) {
- LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping");
+ ShaderDiskCacheUsage usage;
+
+ u32 num_keys{};
+ u32 num_bound_samplers{};
+ u32 num_bindless_samplers{};
+ if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
+ file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
+ file.ReadArray(&num_bound_samplers, 1) != 1 ||
+ file.ReadArray(&num_bindless_samplers, 1) != 1) {
+ LOG_ERROR(Render_OpenGL, error_loading);
return {};
}
+
+ std::vector<ConstBufferKey> keys(num_keys);
+ std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
+ std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
+ if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
+ file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
+ bound_samplers.size() ||
+ file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
+ bindless_samplers.size()) {
+ LOG_ERROR(Render_OpenGL, error_loading);
+ return {};
+ }
+ for (const auto& key : keys) {
+ usage.keys.insert({{key.cbuf, key.offset}, key.value});
+ }
+ for (const auto& key : bound_samplers) {
+ usage.bound_samplers.emplace(key.offset, key.sampler);
+ }
+ for (const auto& key : bindless_samplers) {
+ usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
+ }
+
usages.push_back(std::move(usage));
break;
}
default:
- LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping",
+ LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
static_cast<u32>(kind));
return {};
}
}
- return {{raws, usages}};
+ is_usable = true;
+ return {{std::move(raws), std::move(usages)}};
}
-std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>
+std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>
ShaderDiskCacheOpenGL::LoadPrecompiled() {
- if (!IsUsable())
+ if (!is_usable) {
return {};
+ }
- FileUtil::IOFile file(GetPrecompiledPath(), "rb");
+ std::string path = GetPrecompiledPath();
+ FileUtil::IOFile file(path, "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
GetTitleID());
@@ -198,7 +247,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
const auto result = LoadPrecompiledFile(file);
if (!result) {
LOG_INFO(Render_OpenGL,
- "Failed to load precompiled cache for game with title id={} - removing",
+ "Failed to load precompiled cache for game with title id={}, removing",
GetTitleID());
file.Close();
InvalidatePrecompiled();
@@ -207,7 +256,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
return *result;
}
-std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>>
+std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
@@ -227,238 +276,56 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
- std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
ShaderDumpsMap dumps;
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
- PrecompiledEntryKind kind{};
- if (!LoadObjectFromPrecompiled(kind)) {
+ u32 num_keys{};
+ u32 num_bound_samplers{};
+ u32 num_bindless_samplers{};
+ ShaderDiskCacheUsage usage;
+ if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
+ !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) ||
+ !LoadObjectFromPrecompiled(num_bound_samplers) ||
+ !LoadObjectFromPrecompiled(num_bindless_samplers)) {
return {};
}
-
- switch (kind) {
- case PrecompiledEntryKind::Decompiled: {
- u64 unique_identifier{};
- if (!LoadObjectFromPrecompiled(unique_identifier)) {
- return {};
- }
-
- auto entry = LoadDecompiledEntry();
- if (!entry) {
- return {};
- }
- decompiled.insert({unique_identifier, std::move(*entry)});
- break;
- }
- case PrecompiledEntryKind::Dump: {
- ShaderDiskCacheUsage usage;
- if (!LoadObjectFromPrecompiled(usage)) {
- return {};
- }
-
- ShaderDiskCacheDump dump;
- if (!LoadObjectFromPrecompiled(dump.binary_format)) {
- return {};
- }
-
- u32 binary_length{};
- if (!LoadObjectFromPrecompiled(binary_length)) {
- return {};
- }
-
- dump.binary.resize(binary_length);
- if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
- return {};
- }
-
- dumps.insert({usage, dump});
- break;
- }
- default:
+ std::vector<ConstBufferKey> keys(num_keys);
+ std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
+ std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
+ if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
+ !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
+ bound_samplers.size() ||
+ !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
+ bindless_samplers.size()) {
return {};
}
- }
- return {{decompiled, dumps}};
-}
-
-std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() {
- u32 code_size{};
- if (!LoadObjectFromPrecompiled(code_size)) {
- return {};
- }
-
- std::string code(code_size, '\0');
- if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
- return {};
- }
-
- ShaderDiskCacheDecompiled entry;
- entry.code = std::move(code);
-
- u32 const_buffers_count{};
- if (!LoadObjectFromPrecompiled(const_buffers_count)) {
- return {};
- }
-
- for (u32 i = 0; i < const_buffers_count; ++i) {
- u32 max_offset{};
- u32 index{};
- bool is_indirect{};
- if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
- !LoadObjectFromPrecompiled(is_indirect)) {
- return {};
+ for (const auto& key : keys) {
+ usage.keys.insert({{key.cbuf, key.offset}, key.value});
}
- entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
- }
-
- u32 samplers_count{};
- if (!LoadObjectFromPrecompiled(samplers_count)) {
- return {};
- }
-
- for (u32 i = 0; i < samplers_count; ++i) {
- u64 offset{};
- u64 index{};
- u32 type{};
- bool is_array{};
- bool is_shadow{};
- bool is_bindless{};
- if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
- !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
- !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
- return {};
+ for (const auto& key : bound_samplers) {
+ usage.bound_samplers.emplace(key.offset, key.sampler);
}
- entry.entries.samplers.emplace_back(
- static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
- static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
- }
-
- u32 images_count{};
- if (!LoadObjectFromPrecompiled(images_count)) {
- return {};
- }
- for (u32 i = 0; i < images_count; ++i) {
- u64 offset{};
- u64 index{};
- u32 type{};
- u8 is_bindless{};
- u8 is_written{};
- u8 is_read{};
- u8 is_atomic{};
- if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
- !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) ||
- !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) ||
- !LoadObjectFromPrecompiled(is_atomic)) {
- return {};
+ for (const auto& key : bindless_samplers) {
+ usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
}
- entry.entries.images.emplace_back(
- static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
- static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0,
- is_read != 0, is_atomic != 0);
- }
- u32 global_memory_count{};
- if (!LoadObjectFromPrecompiled(global_memory_count)) {
- return {};
- }
- for (u32 i = 0; i < global_memory_count; ++i) {
- u32 cbuf_index{};
- u32 cbuf_offset{};
- bool is_read{};
- bool is_written{};
- if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
- !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
+ ShaderDiskCacheDump dump;
+ if (!LoadObjectFromPrecompiled(dump.binary_format)) {
return {};
}
- entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
- is_written);
- }
- for (auto& clip_distance : entry.entries.clip_distances) {
- if (!LoadObjectFromPrecompiled(clip_distance)) {
+ u32 binary_length{};
+ if (!LoadObjectFromPrecompiled(binary_length)) {
return {};
}
- }
- u64 shader_length{};
- if (!LoadObjectFromPrecompiled(shader_length)) {
- return {};
- }
- entry.entries.shader_length = static_cast<std::size_t>(shader_length);
-
- return entry;
-}
-
-bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code,
- const GLShader::ShaderEntries& entries) {
- if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
- !SaveObjectToPrecompiled(unique_identifier) ||
- !SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
- !SaveArrayToPrecompiled(code.data(), code.size())) {
- return false;
- }
-
- if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) {
- return false;
- }
- for (const auto& cbuf : entries.const_buffers) {
- if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
- !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
- !SaveObjectToPrecompiled(cbuf.IsIndirect())) {
- return false;
- }
- }
-
- if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) {
- return false;
- }
- for (const auto& sampler : entries.samplers) {
- if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
- !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
- !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
- !SaveObjectToPrecompiled(sampler.IsArray()) ||
- !SaveObjectToPrecompiled(sampler.IsShadow()) ||
- !SaveObjectToPrecompiled(sampler.IsBindless())) {
- return false;
- }
- }
-
- if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
- return false;
- }
- for (const auto& image : entries.images) {
- if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
- !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
- !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) {
- return false;
- }
- }
-
- if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
- return false;
- }
- for (const auto& gmem : entries.global_memory_entries) {
- if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
- !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
- !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
- return false;
- }
- }
-
- for (const bool clip_distance : entries.clip_distances) {
- if (!SaveObjectToPrecompiled(clip_distance)) {
- return false;
+ dump.binary.resize(binary_length);
+ if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
+ return {};
}
- }
- if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
- return false;
+ dumps.emplace(std::move(usage), dump);
}
-
- return true;
+ return dumps;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -479,8 +346,9 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
}
void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
- if (!IsUsable())
+ if (!is_usable) {
return;
+ }
const u64 id = entry.GetUniqueIdentifier();
if (transferable.find(id) != transferable.end()) {
@@ -489,10 +357,11 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
}
FileUtil::IOFile file = AppendTransferableFile();
- if (!file.IsOpen())
+ if (!file.IsOpen()) {
return;
+ }
if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
- LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing");
+ LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
file.Close();
InvalidateTransferable();
return;
@@ -501,8 +370,9 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
}
void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
- if (!IsUsable())
+ if (!is_usable) {
return;
+ }
const auto it = transferable.find(usage.unique_identifier);
ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
@@ -517,35 +387,54 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
FileUtil::IOFile file = AppendTransferableFile();
if (!file.IsOpen())
return;
-
- if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) {
- LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
+ const auto Close = [&] {
+ LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
file.Close();
InvalidateTransferable();
+ };
+
+ if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
+ file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
+ file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
+ file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
+ file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
+ Close();
return;
}
+ for (const auto& [pair, value] : usage.keys) {
+ const auto [cbuf, offset] = pair;
+ if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
+ Close();
+ return;
+ }
+ }
+ for (const auto& [offset, sampler] : usage.bound_samplers) {
+ if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
+ Close();
+ return;
+ }
+ }
+ for (const auto& [pair, sampler] : usage.bindless_samplers) {
+ const auto [cbuf, offset] = pair;
+ if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
+ Close();
+ return;
+ }
+ }
}
-void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code,
- const GLShader::ShaderEntries& entries) {
- if (!IsUsable())
+void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
+ if (!is_usable) {
return;
+ }
+ // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
+ // when writing the dump. This should be done the moment I get access to write to the virtual
+ // file.
if (precompiled_cache_virtual_file.GetSize() == 0) {
SavePrecompiledHeaderToVirtualPrecompiledCache();
}
- if (!SaveDecompiledFile(unique_identifier, code, entries)) {
- LOG_ERROR(Render_OpenGL,
- "Failed to save decompiled entry to the precompiled file - removing");
- InvalidatePrecompiled();
- }
-}
-
-void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
- if (!IsUsable())
- return;
-
GLint binary_length{};
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
@@ -553,25 +442,51 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
- if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) ||
- !SaveObjectToPrecompiled(usage) ||
- !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
- !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
- !SaveArrayToPrecompiled(binary.data(), binary.size())) {
- LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
+ const auto Close = [&] {
+ LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
usage.unique_identifier);
InvalidatePrecompiled();
+ };
+
+ if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
+ !SaveObjectToPrecompiled(usage.variant) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
+ Close();
return;
}
-}
-
-bool ShaderDiskCacheOpenGL::IsUsable() const {
- return tried_to_load && Settings::values.use_disk_shader_cache;
+ for (const auto& [pair, value] : usage.keys) {
+ const auto [cbuf, offset] = pair;
+ if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
+ Close();
+ return;
+ }
+ }
+ for (const auto& [offset, sampler] : usage.bound_samplers) {
+ if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
+ Close();
+ return;
+ }
+ }
+ for (const auto& [pair, sampler] : usage.bindless_samplers) {
+ const auto [cbuf, offset] = pair;
+ if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
+ Close();
+ return;
+ }
+ }
+ if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
+ !SaveArrayToPrecompiled(binary.data(), binary.size())) {
+ Close();
+ }
}
FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
- if (!EnsureDirectories())
+ if (!EnsureDirectories()) {
return {};
+ }
const auto transferable_path{GetTransferablePath()};
const bool existed = FileUtil::Exists(transferable_path);
@@ -603,8 +518,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
precompiled_cache_virtual_file_offset = 0;
- const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
- const std::vector<u8>& compressed =
+ const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
+ const std::vector<u8> compressed =
Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
const auto precompiled_path{GetPrecompiledPath()};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index cc8bbd61e..db23ada93 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -8,6 +8,7 @@
#include <optional>
#include <string>
#include <tuple>
+#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
@@ -19,6 +20,7 @@
#include "common/common_types.h"
#include "core/file_sys/vfs_vector.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/shader/const_buffer_locker.h"
namespace Core {
class System;
@@ -53,6 +55,7 @@ struct BaseBindings {
return !operator==(rhs);
}
};
+static_assert(std::is_trivially_copyable_v<BaseBindings>);
/// Describes the different variants a single program can be compiled.
struct ProgramVariant {
@@ -70,13 +73,20 @@ struct ProgramVariant {
}
};
+static_assert(std::is_trivially_copyable_v<ProgramVariant>);
+
/// Describes how a shader is used.
struct ShaderDiskCacheUsage {
u64 unique_identifier{};
ProgramVariant variant;
+ VideoCommon::Shader::KeyMap keys;
+ VideoCommon::Shader::BoundSamplerMap bound_samplers;
+ VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
bool operator==(const ShaderDiskCacheUsage& rhs) const {
- return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant);
+ return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
+ std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
+ rhs.bindless_samplers);
}
bool operator!=(const ShaderDiskCacheUsage& rhs) const {
@@ -123,8 +133,7 @@ namespace OpenGL {
class ShaderDiskCacheRaw {
public:
explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
- u32 program_code_size, u32 program_code_size_b,
- ProgramCode program_code, ProgramCode program_code_b);
+ ProgramCode program_code, ProgramCode program_code_b = {});
ShaderDiskCacheRaw();
~ShaderDiskCacheRaw();
@@ -155,22 +164,14 @@ public:
private:
u64 unique_identifier{};
ProgramType program_type{};
- u32 program_code_size{};
- u32 program_code_size_b{};
ProgramCode program_code;
ProgramCode program_code_b;
};
-/// Contains decompiled data from a shader
-struct ShaderDiskCacheDecompiled {
- std::string code;
- GLShader::ShaderEntries entries;
-};
-
/// Contains an OpenGL dumped binary program
struct ShaderDiskCacheDump {
- GLenum binary_format;
+ GLenum binary_format{};
std::vector<u8> binary;
};
@@ -184,9 +185,7 @@ public:
LoadTransferable();
/// Loads current game's precompiled cache. Invalidates on failure.
- std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
- std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
- LoadPrecompiled();
+ std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled();
/// Removes the transferable (and precompiled) cache file.
void InvalidateTransferable();
@@ -200,10 +199,6 @@ public:
/// Saves shader usage to the transferable file. Does not check for collisions.
void SaveUsage(const ShaderDiskCacheUsage& usage);
- /// Saves a decompiled entry to the precompiled file. Does not check for collisions.
- void SaveDecompiled(u64 unique_identifier, const std::string& code,
- const GLShader::ShaderEntries& entries);
-
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
@@ -212,21 +207,9 @@ public:
private:
/// Loads the transferable cache. Returns empty on failure.
- std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
- std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
+ std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
LoadPrecompiledFile(FileUtil::IOFile& file);
- /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
- /// failure.
- std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
-
- /// Saves a decompiled entry to the passed file. Returns true on success.
- bool SaveDecompiledFile(u64 unique_identifier, const std::string& code,
- const GLShader::ShaderEntries& entries);
-
- /// Returns if the cache can be used
- bool IsUsable() const;
-
/// Opens current game's transferable file and write it's header if it doesn't exist
FileUtil::IOFile AppendTransferableFile() const;
@@ -297,7 +280,7 @@ private:
std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
// The cache has been loaded at boot
- bool tried_to_load{};
+ bool is_usable{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 3a8d9e1da..af17216bd 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -11,93 +11,56 @@
namespace OpenGL::GLShader {
using Tegra::Engines::Maxwell3D;
+using VideoCommon::Shader::CompileDepth;
+using VideoCommon::Shader::CompilerSettings;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::ShaderIR;
-static constexpr u32 PROGRAM_OFFSET = 10;
-static constexpr u32 COMPUTE_OFFSET = 0;
-
-ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
- const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
-
- std::string out = "// Shader Unique Id: VS" + id + "\n\n";
- out += GetCommonDeclarations();
-
+std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
+ std::string out = GetCommonDeclarations();
out += R"(
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
- vec4 viewport_flip;
- uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
+ float y_direction;
};
)";
-
- const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
- const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
- ProgramResult program = Decompile(device, program_ir, stage, "vertex");
- out += program.first;
-
- if (setup.IsDualProgram()) {
- const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b);
- ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
- out += program_b.first;
+ const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB;
+ out += Decompile(device, ir, stage, "vertex");
+ if (ir_b) {
+ out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b");
}
out += R"(
void main() {
execute_vertex();
)";
-
- if (setup.IsDualProgram()) {
+ if (ir_b) {
out += " execute_vertex_b();";
}
-
- out += R"(
-
- // Set Position Y direction
- gl_Position.y *= utof(config_pack[2]);
- // Check if the flip stage is VertexB
- // Config pack's second value is flip_stage
- if (config_pack[1] == 1) {
- // Viewport can be flipped, which is unsupported by glViewport
- gl_Position.xy *= viewport_flip.xy;
- }
-})";
-
- return {std::move(out), std::move(program.second)};
+ out += "}\n";
+ return out;
}
-ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
- const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
-
- std::string out = "// Shader Unique Id: GS" + id + "\n\n";
- out += GetCommonDeclarations();
-
+std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
+ std::string out = GetCommonDeclarations();
out += R"(
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
- vec4 viewport_flip;
- uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
+ float y_direction;
};
)";
-
- const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
- ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
- out += program.first;
+ out += Decompile(device, ir, ProgramType::Geometry, "geometry");
out += R"(
void main() {
execute_geometry();
-};)";
-
- return {std::move(out), std::move(program.second)};
+}
+)";
+ return out;
}
-ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
- const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
-
- std::string out = "// Shader Unique Id: FS" + id + "\n\n";
- out += GetCommonDeclarations();
-
+std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
+ std::string out = GetCommonDeclarations();
out += R"(
layout (location = 0) out vec4 FragColor0;
layout (location = 1) out vec4 FragColor1;
@@ -109,40 +72,29 @@ layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
- vec4 viewport_flip;
- uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
+ float y_direction;
};
)";
- const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
- ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
- out += program.first;
+ out += Decompile(device, ir, ProgramType::Fragment, "fragment");
out += R"(
void main() {
execute_fragment();
}
-
)";
- return {std::move(out), std::move(program.second)};
+ return out;
}
-ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
- const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
-
- std::string out = "// Shader Unique Id: CS" + id + "\n\n";
- out += GetCommonDeclarations();
-
- const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a);
- ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
- out += program.first;
-
+std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
+ std::string out = GetCommonDeclarations();
+ out += Decompile(device, ir, ProgramType::Compute, "compute");
out += R"(
void main() {
execute_compute();
}
)";
- return {std::move(out), std::move(program.second)};
+ return out;
}
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 3833e88ab..cba2be9f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -17,44 +17,18 @@ class Device;
namespace OpenGL::GLShader {
using VideoCommon::Shader::ProgramCode;
-
-struct ShaderSetup {
- explicit ShaderSetup(ProgramCode program_code) {
- program.code = std::move(program_code);
- }
-
- struct {
- ProgramCode code;
- ProgramCode code_b; // Used for dual vertex shaders
- u64 unique_identifier;
- std::size_t size_a;
- std::size_t size_b;
- } program;
-
- /// Used in scenarios where we have a dual vertex shaders
- void SetProgramB(ProgramCode program_b) {
- program.code_b = std::move(program_b);
- has_program_b = true;
- }
-
- bool IsDualProgram() const {
- return has_program_b;
- }
-
-private:
- bool has_program_b{};
-};
+using VideoCommon::Shader::ShaderIR;
/// Generates the GLSL vertex shader program source code for the given VS program
-ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup);
+std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
/// Generates the GLSL geometry shader program source code for the given GS program
-ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup);
+std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
/// Generates the GLSL fragment shader program source code for the given FS program
-ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
+std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
/// Generates the GLSL compute shader program source code for the given CS program
-ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
+std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index b05f90f20..75d3fac04 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -40,27 +40,11 @@ void ProgramManager::UpdatePipeline() {
old_state = current_state;
}
-void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
+void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) {
const auto& regs = maxwell.regs;
- const auto& state = maxwell.state;
-
- // TODO(bunnei): Support more than one viewport
- viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
- viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
-
- instance_id = state.current_instance;
-
- // Assign in which stage the position has to be flipped
- // (the last stage before the fragment shader).
- constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
- if (maxwell.regs.shader_config[geometry_index].enable) {
- flip_stage = geometry_index;
- } else {
- flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
- }
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
- y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f;
+ y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
}
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 6961e702a..3703e7018 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -18,17 +18,12 @@ namespace OpenGL::GLShader {
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
/// Not following that rule will cause problems on some AMD drivers.
-struct MaxwellUniformData {
- void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
-
- alignas(16) GLvec4 viewport_flip;
- struct alignas(16) {
- GLuint instance_id;
- GLuint flip_stage;
- GLfloat y_direction;
- };
+struct alignas(16) MaxwellUniformData {
+ void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
+
+ GLfloat y_direction;
};
-static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index bf86b5a0b..ccbe5912e 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <iterator>
#include <glad/glad.h>
#include "common/assert.h"
@@ -69,147 +70,29 @@ void Enable(GLenum cap, GLuint index, bool enable) {
}
void Enable(GLenum cap, bool& current_value, bool new_value) {
- if (UpdateValue(current_value, new_value))
+ if (UpdateValue(current_value, new_value)) {
Enable(cap, new_value);
+ }
}
void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
- if (UpdateValue(current_value, new_value))
+ if (UpdateValue(current_value, new_value)) {
Enable(cap, index, new_value);
-}
-
-} // namespace
-
-OpenGLState::OpenGLState() {
- // These all match default OpenGL values
- framebuffer_srgb.enabled = false;
-
- multisample_control.alpha_to_coverage = false;
- multisample_control.alpha_to_one = false;
-
- cull.enabled = false;
- cull.mode = GL_BACK;
- cull.front_face = GL_CCW;
-
- depth.test_enabled = false;
- depth.test_func = GL_LESS;
- depth.write_mask = GL_TRUE;
-
- primitive_restart.enabled = false;
- primitive_restart.index = 0;
-
- for (auto& item : color_mask) {
- item.red_enabled = GL_TRUE;
- item.green_enabled = GL_TRUE;
- item.blue_enabled = GL_TRUE;
- item.alpha_enabled = GL_TRUE;
- }
-
- const auto ResetStencil = [](auto& config) {
- config.test_func = GL_ALWAYS;
- config.test_ref = 0;
- config.test_mask = 0xFFFFFFFF;
- config.write_mask = 0xFFFFFFFF;
- config.action_depth_fail = GL_KEEP;
- config.action_depth_pass = GL_KEEP;
- config.action_stencil_fail = GL_KEEP;
- };
- stencil.test_enabled = false;
- ResetStencil(stencil.front);
- ResetStencil(stencil.back);
-
- for (auto& item : viewports) {
- item.x = 0;
- item.y = 0;
- item.width = 0;
- item.height = 0;
- item.depth_range_near = 0.0f;
- item.depth_range_far = 1.0f;
- item.scissor.enabled = false;
- item.scissor.x = 0;
- item.scissor.y = 0;
- item.scissor.width = 0;
- item.scissor.height = 0;
}
+}
- for (auto& item : blend) {
- item.enabled = true;
- item.rgb_equation = GL_FUNC_ADD;
- item.a_equation = GL_FUNC_ADD;
- item.src_rgb_func = GL_ONE;
- item.dst_rgb_func = GL_ZERO;
- item.src_a_func = GL_ONE;
- item.dst_a_func = GL_ZERO;
- }
-
- independant_blend.enabled = false;
-
- blend_color.red = 0.0f;
- blend_color.green = 0.0f;
- blend_color.blue = 0.0f;
- blend_color.alpha = 0.0f;
-
- logic_op.enabled = false;
- logic_op.operation = GL_COPY;
-
- draw.read_framebuffer = 0;
- draw.draw_framebuffer = 0;
- draw.vertex_array = 0;
- draw.shader_program = 0;
- draw.program_pipeline = 0;
-
- clip_distance = {};
-
- point.size = 1;
-
- fragment_color_clamp.enabled = false;
-
- depth_clamp.far_plane = false;
- depth_clamp.near_plane = false;
-
- polygon_offset.fill_enable = false;
- polygon_offset.line_enable = false;
- polygon_offset.point_enable = false;
- polygon_offset.factor = 0.0f;
- polygon_offset.units = 0.0f;
- polygon_offset.clamp = 0.0f;
+} // Anonymous namespace
- alpha_test.enabled = false;
- alpha_test.func = GL_ALWAYS;
- alpha_test.ref = 0.0f;
-}
+OpenGLState::OpenGLState() = default;
void OpenGLState::SetDefaultViewports() {
- for (auto& item : viewports) {
- item.x = 0;
- item.y = 0;
- item.width = 0;
- item.height = 0;
- item.depth_range_near = 0.0f;
- item.depth_range_far = 1.0f;
- item.scissor.enabled = false;
- item.scissor.x = 0;
- item.scissor.y = 0;
- item.scissor.width = 0;
- item.scissor.height = 0;
- }
+ viewports.fill(Viewport{});
depth_clamp.far_plane = false;
depth_clamp.near_plane = false;
}
-void OpenGLState::ApplyDefaultState() {
- glEnable(GL_BLEND);
- glDisable(GL_FRAMEBUFFER_SRGB);
- glDisable(GL_CULL_FACE);
- glDisable(GL_DEPTH_TEST);
- glDisable(GL_PRIMITIVE_RESTART);
- glDisable(GL_STENCIL_TEST);
- glDisable(GL_COLOR_LOGIC_OP);
- glDisable(GL_SCISSOR_TEST);
-}
-
-void OpenGLState::ApplyFramebufferState() const {
+void OpenGLState::ApplyFramebufferState() {
if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
}
@@ -218,52 +101,52 @@ void OpenGLState::ApplyFramebufferState() const {
}
}
-void OpenGLState::ApplyVertexArrayState() const {
+void OpenGLState::ApplyVertexArrayState() {
if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
glBindVertexArray(draw.vertex_array);
}
}
-void OpenGLState::ApplyShaderProgram() const {
+void OpenGLState::ApplyShaderProgram() {
if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
glUseProgram(draw.shader_program);
}
}
-void OpenGLState::ApplyProgramPipeline() const {
+void OpenGLState::ApplyProgramPipeline() {
if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
glBindProgramPipeline(draw.program_pipeline);
}
}
-void OpenGLState::ApplyClipDistances() const {
+void OpenGLState::ApplyClipDistances() {
for (std::size_t i = 0; i < clip_distance.size(); ++i) {
Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
clip_distance[i]);
}
}
-void OpenGLState::ApplyPointSize() const {
+void OpenGLState::ApplyPointSize() {
if (UpdateValue(cur_state.point.size, point.size)) {
glPointSize(point.size);
}
}
-void OpenGLState::ApplyFragmentColorClamp() const {
+void OpenGLState::ApplyFragmentColorClamp() {
if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
}
}
-void OpenGLState::ApplyMultisample() const {
+void OpenGLState::ApplyMultisample() {
Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
multisample_control.alpha_to_coverage);
Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
multisample_control.alpha_to_one);
}
-void OpenGLState::ApplyDepthClamp() const {
+void OpenGLState::ApplyDepthClamp() {
if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
return;
@@ -276,7 +159,7 @@ void OpenGLState::ApplyDepthClamp() const {
Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
}
-void OpenGLState::ApplySRgb() const {
+void OpenGLState::ApplySRgb() {
if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
return;
cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
@@ -287,7 +170,7 @@ void OpenGLState::ApplySRgb() const {
}
}
-void OpenGLState::ApplyCulling() const {
+void OpenGLState::ApplyCulling() {
Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
if (UpdateValue(cur_state.cull.mode, cull.mode)) {
@@ -299,7 +182,12 @@ void OpenGLState::ApplyCulling() const {
}
}
-void OpenGLState::ApplyColorMask() const {
+void OpenGLState::ApplyColorMask() {
+ if (!dirty.color_mask) {
+ return;
+ }
+ dirty.color_mask = false;
+
for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
const auto& updated = color_mask[i];
auto& current = cur_state.color_mask[i];
@@ -314,7 +202,7 @@ void OpenGLState::ApplyColorMask() const {
}
}
-void OpenGLState::ApplyDepth() const {
+void OpenGLState::ApplyDepth() {
Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
if (cur_state.depth.test_func != depth.test_func) {
@@ -328,7 +216,7 @@ void OpenGLState::ApplyDepth() const {
}
}
-void OpenGLState::ApplyPrimitiveRestart() const {
+void OpenGLState::ApplyPrimitiveRestart() {
Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
if (cur_state.primitive_restart.index != primitive_restart.index) {
@@ -337,7 +225,12 @@ void OpenGLState::ApplyPrimitiveRestart() const {
}
}
-void OpenGLState::ApplyStencilTest() const {
+void OpenGLState::ApplyStencilTest() {
+ if (!dirty.stencil_state) {
+ return;
+ }
+ dirty.stencil_state = false;
+
Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
@@ -366,7 +259,7 @@ void OpenGLState::ApplyStencilTest() const {
ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
}
-void OpenGLState::ApplyViewport() const {
+void OpenGLState::ApplyViewport() {
for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
const auto& updated = viewports[i];
auto& current = cur_state.viewports[i];
@@ -403,7 +296,7 @@ void OpenGLState::ApplyViewport() const {
}
}
-void OpenGLState::ApplyGlobalBlending() const {
+void OpenGLState::ApplyGlobalBlending() {
const Blend& updated = blend[0];
Blend& current = cur_state.blend[0];
@@ -427,7 +320,7 @@ void OpenGLState::ApplyGlobalBlending() const {
}
}
-void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
+void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) {
const Blend& updated = blend[target];
Blend& current = cur_state.blend[target];
@@ -451,7 +344,12 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
}
}
-void OpenGLState::ApplyBlending() const {
+void OpenGLState::ApplyBlending() {
+ if (!dirty.blend_state) {
+ return;
+ }
+ dirty.blend_state = false;
+
if (independant_blend.enabled) {
const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
@@ -470,7 +368,7 @@ void OpenGLState::ApplyBlending() const {
}
}
-void OpenGLState::ApplyLogicOp() const {
+void OpenGLState::ApplyLogicOp() {
Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
@@ -478,7 +376,12 @@ void OpenGLState::ApplyLogicOp() const {
}
}
-void OpenGLState::ApplyPolygonOffset() const {
+void OpenGLState::ApplyPolygonOffset() {
+ if (!dirty.polygon_offset) {
+ return;
+ }
+ dirty.polygon_offset = false;
+
Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
polygon_offset.fill_enable);
Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
@@ -499,7 +402,7 @@ void OpenGLState::ApplyPolygonOffset() const {
}
}
-void OpenGLState::ApplyAlphaTest() const {
+void OpenGLState::ApplyAlphaTest() {
Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled);
if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref),
std::tie(alpha_test.func, alpha_test.ref))) {
@@ -507,19 +410,25 @@ void OpenGLState::ApplyAlphaTest() const {
}
}
-void OpenGLState::ApplyTextures() const {
+void OpenGLState::ApplyClipControl() {
+ if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) {
+ glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE);
+ }
+}
+
+void OpenGLState::ApplyTextures() {
if (const auto update = UpdateArray(cur_state.textures, textures)) {
glBindTextures(update->first, update->second, textures.data() + update->first);
}
}
-void OpenGLState::ApplySamplers() const {
+void OpenGLState::ApplySamplers() {
if (const auto update = UpdateArray(cur_state.samplers, samplers)) {
glBindSamplers(update->first, update->second, samplers.data() + update->first);
}
}
-void OpenGLState::ApplyImages() const {
+void OpenGLState::ApplyImages() {
if (const auto update = UpdateArray(cur_state.images, images)) {
glBindImageTextures(update->first, update->second, images.data() + update->first);
}
@@ -535,33 +444,22 @@ void OpenGLState::Apply() {
ApplyPointSize();
ApplyFragmentColorClamp();
ApplyMultisample();
- if (dirty.color_mask) {
- ApplyColorMask();
- dirty.color_mask = false;
- }
+ ApplyColorMask();
ApplyDepthClamp();
ApplyViewport();
- if (dirty.stencil_state) {
- ApplyStencilTest();
- dirty.stencil_state = false;
- }
+ ApplyStencilTest();
ApplySRgb();
ApplyCulling();
ApplyDepth();
ApplyPrimitiveRestart();
- if (dirty.blend_state) {
- ApplyBlending();
- dirty.blend_state = false;
- }
+ ApplyBlending();
ApplyLogicOp();
ApplyTextures();
ApplySamplers();
ApplyImages();
- if (dirty.polygon_offset) {
- ApplyPolygonOffset();
- dirty.polygon_offset = false;
- }
+ ApplyPolygonOffset();
ApplyAlphaTest();
+ ApplyClipControl();
}
void OpenGLState::EmulateViewportWithScissor() {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index c358d3b38..eaff22bda 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -5,168 +5,150 @@
#pragma once
#include <array>
+#include <type_traits>
#include <glad/glad.h>
#include "video_core/engines/maxwell_3d.h"
namespace OpenGL {
-namespace TextureUnits {
-
-struct TextureUnit {
- GLint id;
- constexpr GLenum Enum() const {
- return static_cast<GLenum>(GL_TEXTURE0 + id);
- }
-};
-
-constexpr TextureUnit MaxwellTexture(int unit) {
- return TextureUnit{unit};
-}
-
-constexpr TextureUnit LightingLUT{3};
-constexpr TextureUnit FogLUT{4};
-constexpr TextureUnit ProcTexNoiseLUT{5};
-constexpr TextureUnit ProcTexColorMap{6};
-constexpr TextureUnit ProcTexAlphaMap{7};
-constexpr TextureUnit ProcTexLUT{8};
-constexpr TextureUnit ProcTexDiffLUT{9};
-
-} // namespace TextureUnits
-
class OpenGLState {
public:
struct {
- bool enabled; // GL_FRAMEBUFFER_SRGB
+ bool enabled = false; // GL_FRAMEBUFFER_SRGB
} framebuffer_srgb;
struct {
- bool alpha_to_coverage; // GL_ALPHA_TO_COVERAGE
- bool alpha_to_one; // GL_ALPHA_TO_ONE
+ bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE
+ bool alpha_to_one = false; // GL_ALPHA_TO_ONE
} multisample_control;
struct {
- bool enabled; // GL_CLAMP_FRAGMENT_COLOR_ARB
+ bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB
} fragment_color_clamp;
struct {
- bool far_plane;
- bool near_plane;
+ bool far_plane = false;
+ bool near_plane = false;
} depth_clamp; // GL_DEPTH_CLAMP
struct {
- bool enabled; // GL_CULL_FACE
- GLenum mode; // GL_CULL_FACE_MODE
- GLenum front_face; // GL_FRONT_FACE
+ bool enabled = false; // GL_CULL_FACE
+ GLenum mode = GL_BACK; // GL_CULL_FACE_MODE
+ GLenum front_face = GL_CCW; // GL_FRONT_FACE
} cull;
struct {
- bool test_enabled; // GL_DEPTH_TEST
- GLenum test_func; // GL_DEPTH_FUNC
- GLboolean write_mask; // GL_DEPTH_WRITEMASK
+ bool test_enabled = false; // GL_DEPTH_TEST
+ GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK
+ GLenum test_func = GL_LESS; // GL_DEPTH_FUNC
} depth;
struct {
- bool enabled;
- GLuint index;
+ bool enabled = false;
+ GLuint index = 0;
} primitive_restart; // GL_PRIMITIVE_RESTART
struct ColorMask {
- GLboolean red_enabled;
- GLboolean green_enabled;
- GLboolean blue_enabled;
- GLboolean alpha_enabled;
+ GLboolean red_enabled = GL_TRUE;
+ GLboolean green_enabled = GL_TRUE;
+ GLboolean blue_enabled = GL_TRUE;
+ GLboolean alpha_enabled = GL_TRUE;
};
std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
color_mask; // GL_COLOR_WRITEMASK
struct {
- bool test_enabled; // GL_STENCIL_TEST
+ bool test_enabled = false; // GL_STENCIL_TEST
struct {
- GLenum test_func; // GL_STENCIL_FUNC
- GLint test_ref; // GL_STENCIL_REF
- GLuint test_mask; // GL_STENCIL_VALUE_MASK
- GLuint write_mask; // GL_STENCIL_WRITEMASK
- GLenum action_stencil_fail; // GL_STENCIL_FAIL
- GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL
- GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS
+ GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC
+ GLint test_ref = 0; // GL_STENCIL_REF
+ GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK
+ GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK
+ GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL
+ GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL
+ GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS
} front, back;
} stencil;
struct Blend {
- bool enabled; // GL_BLEND
- GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
- GLenum a_equation; // GL_BLEND_EQUATION_ALPHA
- GLenum src_rgb_func; // GL_BLEND_SRC_RGB
- GLenum dst_rgb_func; // GL_BLEND_DST_RGB
- GLenum src_a_func; // GL_BLEND_SRC_ALPHA
- GLenum dst_a_func; // GL_BLEND_DST_ALPHA
+ bool enabled = false; // GL_BLEND
+ GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB
+ GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA
+ GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB
+ GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB
+ GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA
+ GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA
};
std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend;
struct {
- bool enabled;
+ bool enabled = false;
} independant_blend;
struct {
- GLclampf red;
- GLclampf green;
- GLclampf blue;
- GLclampf alpha;
+ GLclampf red = 0.0f;
+ GLclampf green = 0.0f;
+ GLclampf blue = 0.0f;
+ GLclampf alpha = 0.0f;
} blend_color; // GL_BLEND_COLOR
struct {
- bool enabled; // GL_LOGIC_OP_MODE
- GLenum operation;
+ bool enabled = false; // GL_LOGIC_OP_MODE
+ GLenum operation = GL_COPY;
} logic_op;
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{};
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures = {};
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers = {};
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {};
struct {
- GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
- GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
- GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
- GLuint shader_program; // GL_CURRENT_PROGRAM
- GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
+ GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING
+ GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING
+ GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING
+ GLuint shader_program = 0; // GL_CURRENT_PROGRAM
+ GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING
} draw;
- struct viewport {
- GLint x;
- GLint y;
- GLint width;
- GLint height;
- GLfloat depth_range_near; // GL_DEPTH_RANGE
- GLfloat depth_range_far; // GL_DEPTH_RANGE
+ struct Viewport {
+ GLint x = 0;
+ GLint y = 0;
+ GLint width = 0;
+ GLint height = 0;
+ GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE
+ GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE
struct {
- bool enabled; // GL_SCISSOR_TEST
- GLint x;
- GLint y;
- GLsizei width;
- GLsizei height;
+ bool enabled = false; // GL_SCISSOR_TEST
+ GLint x = 0;
+ GLint y = 0;
+ GLsizei width = 0;
+ GLsizei height = 0;
} scissor;
};
- std::array<viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
+ std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
struct {
- float size; // GL_POINT_SIZE
+ float size = 1.0f; // GL_POINT_SIZE
} point;
struct {
- bool point_enable;
- bool line_enable;
- bool fill_enable;
- GLfloat units;
- GLfloat factor;
- GLfloat clamp;
+ bool point_enable = false;
+ bool line_enable = false;
+ bool fill_enable = false;
+ GLfloat units = 0.0f;
+ GLfloat factor = 0.0f;
+ GLfloat clamp = 0.0f;
} polygon_offset;
struct {
- bool enabled; // GL_ALPHA_TEST
- GLenum func; // GL_ALPHA_TEST_FUNC
- GLfloat ref; // GL_ALPHA_TEST_REF
+ bool enabled = false; // GL_ALPHA_TEST
+ GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC
+ GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF
} alpha_test;
- std::array<bool, 8> clip_distance; // GL_CLIP_DISTANCE
+ std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE
+
+ struct {
+ GLenum origin = GL_LOWER_LEFT;
+ } clip_control;
OpenGLState();
@@ -179,34 +161,32 @@ public:
/// Apply this state as the current OpenGL state
void Apply();
- void ApplyFramebufferState() const;
- void ApplyVertexArrayState() const;
- void ApplyShaderProgram() const;
- void ApplyProgramPipeline() const;
- void ApplyClipDistances() const;
- void ApplyPointSize() const;
- void ApplyFragmentColorClamp() const;
- void ApplyMultisample() const;
- void ApplySRgb() const;
- void ApplyCulling() const;
- void ApplyColorMask() const;
- void ApplyDepth() const;
- void ApplyPrimitiveRestart() const;
- void ApplyStencilTest() const;
- void ApplyViewport() const;
- void ApplyTargetBlending(std::size_t target, bool force) const;
- void ApplyGlobalBlending() const;
- void ApplyBlending() const;
- void ApplyLogicOp() const;
- void ApplyTextures() const;
- void ApplySamplers() const;
- void ApplyImages() const;
- void ApplyDepthClamp() const;
- void ApplyPolygonOffset() const;
- void ApplyAlphaTest() const;
-
- /// Set the initial OpenGL state
- static void ApplyDefaultState();
+ void ApplyFramebufferState();
+ void ApplyVertexArrayState();
+ void ApplyShaderProgram();
+ void ApplyProgramPipeline();
+ void ApplyClipDistances();
+ void ApplyPointSize();
+ void ApplyFragmentColorClamp();
+ void ApplyMultisample();
+ void ApplySRgb();
+ void ApplyCulling();
+ void ApplyColorMask();
+ void ApplyDepth();
+ void ApplyPrimitiveRestart();
+ void ApplyStencilTest();
+ void ApplyViewport();
+ void ApplyTargetBlending(std::size_t target, bool force);
+ void ApplyGlobalBlending();
+ void ApplyBlending();
+ void ApplyLogicOp();
+ void ApplyTextures();
+ void ApplySamplers();
+ void ApplyImages();
+ void ApplyDepthClamp();
+ void ApplyPolygonOffset();
+ void ApplyAlphaTest();
+ void ApplyClipControl();
/// Resets any references to the given resource
OpenGLState& UnbindTexture(GLuint handle);
@@ -253,5 +233,6 @@ private:
bool color_mask;
} dirty{};
};
+static_assert(std::is_trivially_copyable_v<OpenGLState>);
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 173b76c4e..4659e098f 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -23,7 +23,6 @@ namespace OpenGL {
using Tegra::Texture::SwizzleSource;
using VideoCore::MortonSwizzleMode;
-using VideoCore::Surface::ComponentType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceCompression;
using VideoCore::Surface::SurfaceTarget;
@@ -40,102 +39,95 @@ struct FormatTuple {
GLint internal_format;
GLenum format;
GLenum type;
- ComponentType component_type;
bool compressed;
};
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
- {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
- {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
- {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
- false}, // A2B10G10R10U
- {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
- {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U
- {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
- {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
- {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U
- {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
- {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
- false}, // R11FG11FB10F
- {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
- {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT1
- {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT23
- {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT45
- {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
- {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXN2UNORM
- {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
- {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // BC7U
- {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
- true}, // BC6H_UF16
- {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
- true}, // BC6H_SF16
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
- {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
- {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F
- {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F
- {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F
- {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F
- {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U
- {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S
- {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI
- {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I
- {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16
- {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F
- {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI
- {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I
- {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
- {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
- false}, // RGBA8_SRGB
- {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
- {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
- {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
- {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBX16F
- {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
- {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U
+ {GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S
+ {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U
+ {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5U
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8U
+ {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
+ {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
+ {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
+ {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
+ {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
+ {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI
+ {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
+ {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
+ {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
+ {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1
+ {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN2UNORM
+ {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true}, // DXN2SNORM
+ {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U
+ {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16
+ {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_SF16
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4
+ {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8
+ {GL_RGBA32F, GL_RGBA, GL_FLOAT, false}, // RGBA32F
+ {GL_RG32F, GL_RG, GL_FLOAT, false}, // RG32F
+ {GL_R32F, GL_RED, GL_FLOAT, false}, // R32F
+ {GL_R16F, GL_RED, GL_HALF_FLOAT, false}, // R16F
+ {GL_R16, GL_RED, GL_UNSIGNED_SHORT, false}, // R16U
+ {GL_R16_SNORM, GL_RED, GL_SHORT, false}, // R16S
+ {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false}, // R16UI
+ {GL_R16I, GL_RED_INTEGER, GL_SHORT, false}, // R16I
+ {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false}, // RG16
+ {GL_RG16F, GL_RG, GL_HALF_FLOAT, false}, // RG16F
+ {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false}, // RG16UI
+ {GL_RG16I, GL_RG_INTEGER, GL_SHORT, false}, // RG16I
+ {GL_RG16_SNORM, GL_RG, GL_SHORT, false}, // RG16S
+ {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U
+ {GL_RG8, GL_RG, GL_BYTE, false}, // RG8S
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
+ {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4
+ {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8
// Compressed sRGB formats
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT1_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT23_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // DXT45_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
- true}, // BC7U_SRGB
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false}, // R4G4B4A4U
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4_SRGB
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8_SRGB
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5_SRGB
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5_SRGB
+ {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F
// Depth formats
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
- false}, // Z16
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false}, // Z32F
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16
// DepthStencil formats
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
- false}, // Z24S8
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
- false}, // S8Z24
- {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
- ComponentType::Float, false}, // Z32FS8
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // Z24S8
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // S8Z24
+ {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8
}};
-const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
+const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
return format;
@@ -237,7 +229,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params)
: VideoCommon::SurfaceBase<View>(gpu_addr, params) {
- const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)};
+ const auto& tuple{GetFormatTuple(params.pixel_format)};
internal_format = tuple.internal_format;
format = tuple.format;
type = tuple.type;
@@ -439,8 +431,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
texture_view.Create();
const GLuint handle{texture_view.handle};
- const FormatTuple& tuple{
- GetFormatTuple(owner_params.pixel_format, owner_params.component_type)};
+ const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)};
glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level,
params.num_levels, params.base_layer, params.num_layers);
@@ -550,8 +541,8 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
const auto& dst_params = dst_surface->GetSurfaceParams();
UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
- const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
- const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
+ const auto source_format = GetFormatTuple(src_params.pixel_format);
+ const auto dest_format = GetFormatTuple(dst_params.pixel_format);
const std::size_t source_size = src_surface->GetHostSizeInBytes();
const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 1e6ef66ab..7646cbb0e 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
RendererOpenGL::~RendererOpenGL() = default;
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
- system.GetPerfStats().EndSystemFrame();
-
// Maintain the rasterizer's state as a priority
OpenGLState prev_state = OpenGLState::GetCurState();
state.AllDirty();
@@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
render_window.PollEvents();
- system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
- system.GetPerfStats().BeginSystemFrame();
-
// Restore the rasterizer state
prev_state.AllDirty();
prev_state.Apply();
@@ -328,10 +323,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
// (e.g. handheld mode) on a 1920x1080 framebuffer.
f32 scale_u = 1.f, scale_v = 1.f;
if (framebuffer_crop_rect.GetWidth() > 0) {
- scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / screen_info.texture.width;
+ scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
+ static_cast<f32>(screen_info.texture.width);
}
if (framebuffer_crop_rect.GetHeight() > 0) {
- scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / screen_info.texture.height;
+ scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
+ static_cast<f32>(screen_info.texture.height);
}
std::array<ScreenRectVertex, 4> vertices = {{
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 3c5acda3e..463ed43ae 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -95,83 +95,82 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar
} // namespace Sampler
struct FormatTuple {
- vk::Format format; ///< Vulkan format
- ComponentType component_type; ///< Abstracted component type
- bool attachable; ///< True when this format can be used as an attachment
+ vk::Format format; ///< Vulkan format
+ bool attachable; ///< True when this format can be used as an attachment
};
static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
- {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI
- {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U
- {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U
- {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI
- {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1
- {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23
- {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45
- {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F
- {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBX16F
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4
+ {vk::Format::eA8B8G8R8UnormPack32, true}, // ABGR8U
+ {vk::Format::eUndefined, false}, // ABGR8S
+ {vk::Format::eUndefined, false}, // ABGR8UI
+ {vk::Format::eB5G6R5UnormPack16, false}, // B5G6R5U
+ {vk::Format::eA2B10G10R10UnormPack32, true}, // A2B10G10R10U
+ {vk::Format::eUndefined, false}, // A1B5G5R5U
+ {vk::Format::eR8Unorm, true}, // R8U
+ {vk::Format::eUndefined, false}, // R8UI
+ {vk::Format::eUndefined, false}, // RGBA16F
+ {vk::Format::eUndefined, false}, // RGBA16U
+ {vk::Format::eUndefined, false}, // RGBA16UI
+ {vk::Format::eUndefined, false}, // R11FG11FB10F
+ {vk::Format::eUndefined, false}, // RGBA32UI
+ {vk::Format::eBc1RgbaUnormBlock, false}, // DXT1
+ {vk::Format::eBc2UnormBlock, false}, // DXT23
+ {vk::Format::eBc3UnormBlock, false}, // DXT45
+ {vk::Format::eBc4UnormBlock, false}, // DXN1
+ {vk::Format::eUndefined, false}, // DXN2UNORM
+ {vk::Format::eUndefined, false}, // DXN2SNORM
+ {vk::Format::eUndefined, false}, // BC7U
+ {vk::Format::eUndefined, false}, // BC6H_UF16
+ {vk::Format::eUndefined, false}, // BC6H_SF16
+ {vk::Format::eUndefined, false}, // ASTC_2D_4X4
+ {vk::Format::eUndefined, false}, // BGRA8
+ {vk::Format::eUndefined, false}, // RGBA32F
+ {vk::Format::eUndefined, false}, // RG32F
+ {vk::Format::eUndefined, false}, // R32F
+ {vk::Format::eUndefined, false}, // R16F
+ {vk::Format::eUndefined, false}, // R16U
+ {vk::Format::eUndefined, false}, // R16S
+ {vk::Format::eUndefined, false}, // R16UI
+ {vk::Format::eUndefined, false}, // R16I
+ {vk::Format::eUndefined, false}, // RG16
+ {vk::Format::eUndefined, false}, // RG16F
+ {vk::Format::eUndefined, false}, // RG16UI
+ {vk::Format::eUndefined, false}, // RG16I
+ {vk::Format::eUndefined, false}, // RG16S
+ {vk::Format::eUndefined, false}, // RGB32F
+ {vk::Format::eA8B8G8R8SrgbPack32, true}, // RGBA8_SRGB
+ {vk::Format::eUndefined, false}, // RG8U
+ {vk::Format::eUndefined, false}, // RG8S
+ {vk::Format::eUndefined, false}, // RG32UI
+ {vk::Format::eUndefined, false}, // RGBX16F
+ {vk::Format::eUndefined, false}, // R32UI
+ {vk::Format::eUndefined, false}, // ASTC_2D_8X8
+ {vk::Format::eUndefined, false}, // ASTC_2D_8X5
+ {vk::Format::eUndefined, false}, // ASTC_2D_5X4
// Compressed sRGB formats
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
+ {vk::Format::eUndefined, false}, // BGRA8_SRGB
+ {vk::Format::eUndefined, false}, // DXT1_SRGB
+ {vk::Format::eUndefined, false}, // DXT23_SRGB
+ {vk::Format::eUndefined, false}, // DXT45_SRGB
+ {vk::Format::eUndefined, false}, // BC7U_SRGB
+ {vk::Format::eUndefined, false}, // ASTC_2D_4X4_SRGB
+ {vk::Format::eUndefined, false}, // ASTC_2D_8X8_SRGB
+ {vk::Format::eUndefined, false}, // ASTC_2D_8X5_SRGB
+ {vk::Format::eUndefined, false}, // ASTC_2D_5X4_SRGB
+ {vk::Format::eUndefined, false}, // ASTC_2D_5X5
+ {vk::Format::eUndefined, false}, // ASTC_2D_5X5_SRGB
+ {vk::Format::eUndefined, false}, // ASTC_2D_10X8
+ {vk::Format::eUndefined, false}, // ASTC_2D_10X8_SRGB
// Depth formats
- {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
- {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16
+ {vk::Format::eD32Sfloat, true}, // Z32F
+ {vk::Format::eD16Unorm, true}, // Z16
// DepthStencil formats
- {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
- {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
- {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8
+ {vk::Format::eD24UnormS8Uint, true}, // Z24S8
+ {vk::Format::eD24UnormS8Uint, true}, // S8Z24 (emulated)
+ {vk::Format::eUndefined, false}, // Z32FS8
}};
static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
@@ -180,14 +179,13 @@ static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
}
std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
- PixelFormat pixel_format, ComponentType component_type) {
+ PixelFormat pixel_format) {
ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
- "Unimplemented texture format with pixel format={} and component type={}",
- static_cast<u32>(pixel_format), static_cast<u32>(component_type));
- ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
+ "Unimplemented texture format with pixel format={}",
+ static_cast<u32>(pixel_format));
auto usage = vk::FormatFeatureFlagBits::eSampledImage |
vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 4cadc0721..5b0ffd87a 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -16,7 +16,6 @@ namespace Vulkan::MaxwellToVK {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = VideoCore::Surface::PixelFormat;
-using ComponentType = VideoCore::Surface::ComponentType;
namespace Sampler {
@@ -31,7 +30,7 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar
} // namespace Sampler
std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
- PixelFormat pixel_format, ComponentType component_type);
+ PixelFormat pixel_format);
vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 77fc58f25..2850d5b59 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -88,6 +88,9 @@ bool IsPrecise(Operation operand) {
} // namespace
+class ASTDecompiler;
+class ExprDecompiler;
+
class SPIRVDecompiler : public Sirit::Module {
public:
explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage)
@@ -97,27 +100,7 @@ public:
AddExtension("SPV_KHR_variable_pointers");
}
- void Decompile() {
- AllocateBindings();
- AllocateLabels();
-
- DeclareVertex();
- DeclareGeometry();
- DeclareFragment();
- DeclareRegisters();
- DeclarePredicates();
- DeclareLocalMemory();
- DeclareInternalFlags();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareConstantBuffers();
- DeclareGlobalBuffers();
- DeclareSamplers();
-
- execute_function =
- Emit(OpFunction(t_void, spv::FunctionControlMask::Inline, TypeFunction(t_void)));
- Emit(OpLabel());
-
+ void DecompileBranchMode() {
const u32 first_address = ir.GetBasicBlocks().begin()->first;
const Id loop_label = OpLabel("loop");
const Id merge_label = OpLabel("merge");
@@ -174,6 +157,43 @@ public:
Emit(continue_label);
Emit(OpBranch(loop_label));
Emit(merge_label);
+ }
+
+ void DecompileAST();
+
+ void Decompile() {
+ const bool is_fully_decompiled = ir.IsDecompiled();
+ AllocateBindings();
+ if (!is_fully_decompiled) {
+ AllocateLabels();
+ }
+
+ DeclareVertex();
+ DeclareGeometry();
+ DeclareFragment();
+ DeclareRegisters();
+ DeclarePredicates();
+ if (is_fully_decompiled) {
+ DeclareFlowVariables();
+ }
+ DeclareLocalMemory();
+ DeclareInternalFlags();
+ DeclareInputAttributes();
+ DeclareOutputAttributes();
+ DeclareConstantBuffers();
+ DeclareGlobalBuffers();
+ DeclareSamplers();
+
+ execute_function =
+ Emit(OpFunction(t_void, spv::FunctionControlMask::Inline, TypeFunction(t_void)));
+ Emit(OpLabel());
+
+ if (is_fully_decompiled) {
+ DecompileAST();
+ } else {
+ DecompileBranchMode();
+ }
+
Emit(OpReturn());
Emit(OpFunctionEnd());
}
@@ -206,6 +226,9 @@ public:
}
private:
+ friend class ASTDecompiler;
+ friend class ExprDecompiler;
+
static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
void AllocateBindings() {
@@ -294,6 +317,14 @@ private:
}
}
+ void DeclareFlowVariables() {
+ for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
+ const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
+ Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
+ flow_variables.emplace(i, AddGlobalVariable(id));
+ }
+ }
+
void DeclareLocalMemory() {
if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
const auto element_count = static_cast<u32>(Common::AlignUp(local_memory_size, 4) / 4);
@@ -615,9 +646,15 @@ private:
Emit(OpBranchConditional(condition, true_label, skip_label));
Emit(true_label);
+ ++conditional_nest_count;
VisitBasicBlock(conditional->GetCode());
+ --conditional_nest_count;
- Emit(OpBranch(skip_label));
+ if (inside_branch == 0) {
+ Emit(OpBranch(skip_label));
+ } else {
+ inside_branch--;
+ }
Emit(skip_label);
return {};
@@ -746,6 +783,11 @@ private:
return {};
}
+ Id FSwizzleAdd(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id HNegate(Operation operation) {
UNIMPLEMENTED();
return {};
@@ -980,7 +1022,11 @@ private:
UNIMPLEMENTED_IF(!target);
Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue())));
- BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+ Emit(OpBranch(continue_label));
+ inside_branch = conditional_nest_count;
+ if (conditional_nest_count == 0) {
+ Emit(OpLabel());
+ }
return {};
}
@@ -988,7 +1034,11 @@ private:
const Id op_a = VisitOperand<Type::Uint>(operation, 0);
Emit(OpStore(jmp_to, op_a));
- BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+ Emit(OpBranch(continue_label));
+ inside_branch = conditional_nest_count;
+ if (conditional_nest_count == 0) {
+ Emit(OpLabel());
+ }
return {};
}
@@ -1015,11 +1065,15 @@ private:
Emit(OpStore(flow_stack_top, previous));
Emit(OpStore(jmp_to, target));
- BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+ Emit(OpBranch(continue_label));
+ inside_branch = conditional_nest_count;
+ if (conditional_nest_count == 0) {
+ Emit(OpLabel());
+ }
return {};
}
- Id Exit(Operation operation) {
+ Id PreExit() {
switch (stage) {
case ShaderStage::Vertex: {
// TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
@@ -1067,12 +1121,35 @@ private:
}
}
- BranchingOp([&]() { Emit(OpReturn()); });
+ return {};
+ }
+
+ Id Exit(Operation operation) {
+ PreExit();
+ inside_branch = conditional_nest_count;
+ if (conditional_nest_count > 0) {
+ Emit(OpReturn());
+ } else {
+ const Id dummy = OpLabel();
+ Emit(OpBranch(dummy));
+ Emit(dummy);
+ Emit(OpReturn());
+ Emit(OpLabel());
+ }
return {};
}
Id Discard(Operation operation) {
- BranchingOp([&]() { Emit(OpKill()); });
+ inside_branch = conditional_nest_count;
+ if (conditional_nest_count > 0) {
+ Emit(OpKill());
+ } else {
+ const Id dummy = OpLabel();
+ Emit(OpBranch(dummy));
+ Emit(dummy);
+ Emit(OpKill());
+ Emit(OpLabel());
+ }
return {};
}
@@ -1123,42 +1200,12 @@ private:
return {};
}
- Id ShuffleIndexed(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id ShuffleUp(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id ShuffleDown(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id ShuffleButterfly(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id InRangeShuffleIndexed(Operation) {
+ Id ThreadId(Operation) {
UNIMPLEMENTED();
return {};
}
- Id InRangeShuffleUp(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id InRangeShuffleDown(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id InRangeShuffleButterfly(Operation) {
+ Id ShuffleIndexed(Operation) {
UNIMPLEMENTED();
return {};
}
@@ -1267,17 +1314,6 @@ private:
return {};
}
- void BranchingOp(std::function<void()> call) {
- const Id true_label = OpLabel();
- const Id skip_label = OpLabel();
- Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::Flatten));
- Emit(OpBranchConditional(v_true, true_label, skip_label, 1, 0));
- Emit(true_label);
- call();
-
- Emit(skip_label);
- }
-
std::tuple<Id, Id> CreateFlowStack() {
// TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
// that shaders will use 20 nested SSYs and PBKs.
@@ -1332,6 +1368,7 @@ private:
&SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
&SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
&SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
+ &SPIRVDecompiler::FSwizzleAdd,
&SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
&SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
@@ -1467,15 +1504,8 @@ private:
&SPIRVDecompiler::VoteAny,
&SPIRVDecompiler::VoteEqual,
+ &SPIRVDecompiler::ThreadId,
&SPIRVDecompiler::ShuffleIndexed,
- &SPIRVDecompiler::ShuffleUp,
- &SPIRVDecompiler::ShuffleDown,
- &SPIRVDecompiler::ShuffleButterfly,
-
- &SPIRVDecompiler::InRangeShuffleIndexed,
- &SPIRVDecompiler::InRangeShuffleUp,
- &SPIRVDecompiler::InRangeShuffleDown,
- &SPIRVDecompiler::InRangeShuffleButterfly,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -1483,6 +1513,8 @@ private:
const ShaderIR& ir;
const ShaderStage stage;
const Tegra::Shader::Header header;
+ u64 conditional_nest_count{};
+ u64 inside_branch{};
const Id t_void = Name(TypeVoid(), "void");
@@ -1545,6 +1577,7 @@ private:
Id per_vertex{};
std::map<u32, Id> registers;
std::map<Tegra::Shader::Pred, Id> predicates;
+ std::map<u32, Id> flow_variables;
Id local_memory{};
std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
std::map<Attribute::Index, Id> input_attributes;
@@ -1580,6 +1613,235 @@ private:
std::map<u32, Id> labels;
};
+class ExprDecompiler {
+public:
+ explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {}
+
+ Id operator()(const ExprAnd& expr) {
+ const Id type_def = decomp.GetTypeDefinition(Type::Bool);
+ const Id op1 = Visit(expr.operand1);
+ const Id op2 = Visit(expr.operand2);
+ return decomp.Emit(decomp.OpLogicalAnd(type_def, op1, op2));
+ }
+
+ Id operator()(const ExprOr& expr) {
+ const Id type_def = decomp.GetTypeDefinition(Type::Bool);
+ const Id op1 = Visit(expr.operand1);
+ const Id op2 = Visit(expr.operand2);
+ return decomp.Emit(decomp.OpLogicalOr(type_def, op1, op2));
+ }
+
+ Id operator()(const ExprNot& expr) {
+ const Id type_def = decomp.GetTypeDefinition(Type::Bool);
+ const Id op1 = Visit(expr.operand1);
+ return decomp.Emit(decomp.OpLogicalNot(type_def, op1));
+ }
+
+ Id operator()(const ExprPredicate& expr) {
+ const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
+ return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred)));
+ }
+
+ Id operator()(const ExprCondCode& expr) {
+ const Node cc = decomp.ir.GetConditionCode(expr.cc);
+ Id target;
+
+ if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
+ const auto index = pred->GetIndex();
+ switch (index) {
+ case Tegra::Shader::Pred::NeverExecute:
+ target = decomp.v_false;
+ break;
+ case Tegra::Shader::Pred::UnusedIndex:
+ target = decomp.v_true;
+ break;
+ default:
+ target = decomp.predicates.at(index);
+ break;
+ }
+ } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
+ target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag()));
+ }
+ return decomp.Emit(decomp.OpLoad(decomp.t_bool, target));
+ }
+
+ Id operator()(const ExprVar& expr) {
+ return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index)));
+ }
+
+ Id operator()(const ExprBoolean& expr) {
+ return expr.value ? decomp.v_true : decomp.v_false;
+ }
+
+ Id operator()(const ExprGprEqual& expr) {
+ const Id target = decomp.Constant(decomp.t_uint, expr.value);
+ const Id gpr = decomp.BitcastTo<Type::Uint>(
+ decomp.Emit(decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr))));
+ return decomp.Emit(decomp.OpLogicalEqual(decomp.t_uint, gpr, target));
+ }
+
+ Id Visit(const Expr& node) {
+ return std::visit(*this, *node);
+ }
+
+private:
+ SPIRVDecompiler& decomp;
+};
+
+class ASTDecompiler {
+public:
+ explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {}
+
+ void operator()(const ASTProgram& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(const ASTIfThen& ast) {
+ ExprDecompiler expr_parser{decomp};
+ const Id condition = expr_parser.Visit(ast.condition);
+ const Id then_label = decomp.OpLabel();
+ const Id endif_label = decomp.OpLabel();
+ decomp.Emit(decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone));
+ decomp.Emit(decomp.OpBranchConditional(condition, then_label, endif_label));
+ decomp.Emit(then_label);
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ decomp.Emit(decomp.OpBranch(endif_label));
+ decomp.Emit(endif_label);
+ }
+
+ void operator()([[maybe_unused]] const ASTIfElse& ast) {
+ UNREACHABLE();
+ }
+
+ void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
+ UNREACHABLE();
+ }
+
+ void operator()(const ASTBlockDecoded& ast) {
+ decomp.VisitBasicBlock(ast.nodes);
+ }
+
+ void operator()(const ASTVarSet& ast) {
+ ExprDecompiler expr_parser{decomp};
+ const Id condition = expr_parser.Visit(ast.condition);
+ decomp.Emit(decomp.OpStore(decomp.flow_variables.at(ast.index), condition));
+ }
+
+ void operator()([[maybe_unused]] const ASTLabel& ast) {
+ // Do nothing
+ }
+
+ void operator()([[maybe_unused]] const ASTGoto& ast) {
+ UNREACHABLE();
+ }
+
+ void operator()(const ASTDoWhile& ast) {
+ const Id loop_label = decomp.OpLabel();
+ const Id endloop_label = decomp.OpLabel();
+ const Id loop_start_block = decomp.OpLabel();
+ const Id loop_end_block = decomp.OpLabel();
+ current_loop_exit = endloop_label;
+ decomp.Emit(decomp.OpBranch(loop_label));
+ decomp.Emit(loop_label);
+ decomp.Emit(
+ decomp.OpLoopMerge(endloop_label, loop_end_block, spv::LoopControlMask::MaskNone));
+ decomp.Emit(decomp.OpBranch(loop_start_block));
+ decomp.Emit(loop_start_block);
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ ExprDecompiler expr_parser{decomp};
+ const Id condition = expr_parser.Visit(ast.condition);
+ decomp.Emit(decomp.OpBranchConditional(condition, loop_label, endloop_label));
+ decomp.Emit(endloop_label);
+ }
+
+ void operator()(const ASTReturn& ast) {
+ if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
+ ExprDecompiler expr_parser{decomp};
+ const Id condition = expr_parser.Visit(ast.condition);
+ const Id then_label = decomp.OpLabel();
+ const Id endif_label = decomp.OpLabel();
+ decomp.Emit(decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone));
+ decomp.Emit(decomp.OpBranchConditional(condition, then_label, endif_label));
+ decomp.Emit(then_label);
+ if (ast.kills) {
+ decomp.Emit(decomp.OpKill());
+ } else {
+ decomp.PreExit();
+ decomp.Emit(decomp.OpReturn());
+ }
+ decomp.Emit(endif_label);
+ } else {
+ const Id next_block = decomp.OpLabel();
+ decomp.Emit(decomp.OpBranch(next_block));
+ decomp.Emit(next_block);
+ if (ast.kills) {
+ decomp.Emit(decomp.OpKill());
+ } else {
+ decomp.PreExit();
+ decomp.Emit(decomp.OpReturn());
+ }
+ decomp.Emit(decomp.OpLabel());
+ }
+ }
+
+ void operator()(const ASTBreak& ast) {
+ if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
+ ExprDecompiler expr_parser{decomp};
+ const Id condition = expr_parser.Visit(ast.condition);
+ const Id then_label = decomp.OpLabel();
+ const Id endif_label = decomp.OpLabel();
+ decomp.Emit(decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone));
+ decomp.Emit(decomp.OpBranchConditional(condition, then_label, endif_label));
+ decomp.Emit(then_label);
+ decomp.Emit(decomp.OpBranch(current_loop_exit));
+ decomp.Emit(endif_label);
+ } else {
+ const Id next_block = decomp.OpLabel();
+ decomp.Emit(decomp.OpBranch(next_block));
+ decomp.Emit(next_block);
+ decomp.Emit(decomp.OpBranch(current_loop_exit));
+ decomp.Emit(decomp.OpLabel());
+ }
+ }
+
+ void Visit(const ASTNode& node) {
+ std::visit(*this, *node->GetInnerData());
+ }
+
+private:
+ SPIRVDecompiler& decomp;
+ Id current_loop_exit{};
+};
+
+void SPIRVDecompiler::DecompileAST() {
+ const u32 num_flow_variables = ir.GetASTNumVariables();
+ for (u32 i = 0; i < num_flow_variables; i++) {
+ const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
+ Name(id, fmt::format("flow_var_{}", i));
+ flow_variables.emplace(i, AddGlobalVariable(id));
+ }
+
+ const ASTNode program = ir.GetASTProgram();
+ ASTDecompiler decompiler{*this};
+ decompiler.Visit(program);
+
+ const Id next_block = OpLabel();
+ Emit(OpBranch(next_block));
+ Emit(next_block);
+}
+
DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
Maxwell::ShaderStage stage) {
auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage);
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
new file mode 100644
index 000000000..3f96d9076
--- /dev/null
+++ b/src/video_core/shader/ast.cpp
@@ -0,0 +1,753 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/shader/ast.h"
+#include "video_core/shader/expr.h"
+
+namespace VideoCommon::Shader {
+
+ASTZipper::ASTZipper() = default;
+
+void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) {
+ ASSERT(new_first->manager == nullptr);
+ first = new_first;
+ last = new_first;
+
+ ASTNode current = first;
+ while (current) {
+ current->manager = this;
+ current->parent = parent;
+ last = current;
+ current = current->next;
+ }
+}
+
+void ASTZipper::PushBack(const ASTNode new_node) {
+ ASSERT(new_node->manager == nullptr);
+ new_node->previous = last;
+ if (last) {
+ last->next = new_node;
+ }
+ new_node->next.reset();
+ last = new_node;
+ if (!first) {
+ first = new_node;
+ }
+ new_node->manager = this;
+}
+
+void ASTZipper::PushFront(const ASTNode new_node) {
+ ASSERT(new_node->manager == nullptr);
+ new_node->previous.reset();
+ new_node->next = first;
+ if (first) {
+ first->previous = new_node;
+ }
+ if (last == first) {
+ last = new_node;
+ }
+ first = new_node;
+ new_node->manager = this;
+}
+
+void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) {
+ ASSERT(new_node->manager == nullptr);
+ if (!at_node) {
+ PushFront(new_node);
+ return;
+ }
+ const ASTNode next = at_node->next;
+ if (next) {
+ next->previous = new_node;
+ }
+ new_node->previous = at_node;
+ if (at_node == last) {
+ last = new_node;
+ }
+ new_node->next = next;
+ at_node->next = new_node;
+ new_node->manager = this;
+}
+
+void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) {
+ ASSERT(new_node->manager == nullptr);
+ if (!at_node) {
+ PushBack(new_node);
+ return;
+ }
+ const ASTNode previous = at_node->previous;
+ if (previous) {
+ previous->next = new_node;
+ }
+ new_node->next = at_node;
+ if (at_node == first) {
+ first = new_node;
+ }
+ new_node->previous = previous;
+ at_node->previous = new_node;
+ new_node->manager = this;
+}
+
+void ASTZipper::DetachTail(ASTNode node) {
+ ASSERT(node->manager == this);
+ if (node == first) {
+ first.reset();
+ last.reset();
+ return;
+ }
+
+ last = node->previous;
+ last->next.reset();
+ node->previous.reset();
+
+ ASTNode current = std::move(node);
+ while (current) {
+ current->manager = nullptr;
+ current->parent.reset();
+ current = current->next;
+ }
+}
+
+void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) {
+ ASSERT(start->manager == this && end->manager == this);
+ if (start == end) {
+ DetachSingle(start);
+ return;
+ }
+ const ASTNode prev = start->previous;
+ const ASTNode post = end->next;
+ if (!prev) {
+ first = post;
+ } else {
+ prev->next = post;
+ }
+ if (!post) {
+ last = prev;
+ } else {
+ post->previous = prev;
+ }
+ start->previous.reset();
+ end->next.reset();
+ ASTNode current = start;
+ bool found = false;
+ while (current) {
+ current->manager = nullptr;
+ current->parent.reset();
+ found |= current == end;
+ current = current->next;
+ }
+ ASSERT(found);
+}
+
+void ASTZipper::DetachSingle(const ASTNode node) {
+ ASSERT(node->manager == this);
+ const ASTNode prev = node->previous;
+ const ASTNode post = node->next;
+ node->previous.reset();
+ node->next.reset();
+ if (!prev) {
+ first = post;
+ } else {
+ prev->next = post;
+ }
+ if (!post) {
+ last = prev;
+ } else {
+ post->previous = prev;
+ }
+
+ node->manager = nullptr;
+ node->parent.reset();
+}
+
+void ASTZipper::Remove(const ASTNode node) {
+ ASSERT(node->manager == this);
+ const ASTNode next = node->next;
+ const ASTNode previous = node->previous;
+ if (previous) {
+ previous->next = next;
+ }
+ if (next) {
+ next->previous = previous;
+ }
+ node->parent.reset();
+ node->manager = nullptr;
+ if (node == last) {
+ last = previous;
+ }
+ if (node == first) {
+ first = next;
+ }
+}
+
+class ExprPrinter final {
+public:
+ void operator()(const ExprAnd& expr) {
+ inner += "( ";
+ std::visit(*this, *expr.operand1);
+ inner += " && ";
+ std::visit(*this, *expr.operand2);
+ inner += ')';
+ }
+
+ void operator()(const ExprOr& expr) {
+ inner += "( ";
+ std::visit(*this, *expr.operand1);
+ inner += " || ";
+ std::visit(*this, *expr.operand2);
+ inner += ')';
+ }
+
+ void operator()(const ExprNot& expr) {
+ inner += "!";
+ std::visit(*this, *expr.operand1);
+ }
+
+ void operator()(const ExprPredicate& expr) {
+ inner += "P" + std::to_string(expr.predicate);
+ }
+
+ void operator()(const ExprCondCode& expr) {
+ u32 cc = static_cast<u32>(expr.cc);
+ inner += "CC" + std::to_string(cc);
+ }
+
+ void operator()(const ExprVar& expr) {
+ inner += "V" + std::to_string(expr.var_index);
+ }
+
+ void operator()(const ExprBoolean& expr) {
+ inner += expr.value ? "true" : "false";
+ }
+
+ void operator()(const ExprGprEqual& expr) {
+ inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')';
+ }
+
+ const std::string& GetResult() const {
+ return inner;
+ }
+
+private:
+ std::string inner;
+};
+
+class ASTPrinter {
+public:
+ void operator()(const ASTProgram& ast) {
+ scope++;
+ inner += "program {\n";
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ inner += "}\n";
+ scope--;
+ }
+
+ void operator()(const ASTIfThen& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
+ scope++;
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ scope--;
+ inner += fmt::format("{}}}\n", Indent());
+ }
+
+ void operator()(const ASTIfElse& ast) {
+ inner += Indent();
+ inner += "else {\n";
+
+ scope++;
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ scope--;
+
+ inner += Indent();
+ inner += "}\n";
+ }
+
+ void operator()(const ASTBlockEncoded& ast) {
+ inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
+ }
+
+ void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
+ inner += Indent();
+ inner += "Block;\n";
+ }
+
+ void operator()(const ASTVarSet& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
+ }
+
+ void operator()(const ASTLabel& ast) {
+ inner += fmt::format("Label_{}:\n", ast.index);
+ }
+
+ void operator()(const ASTGoto& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner +=
+ fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
+ }
+
+ void operator()(const ASTDoWhile& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner += fmt::format("{}do {{\n", Indent());
+ scope++;
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ scope--;
+ inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
+ }
+
+ void operator()(const ASTReturn& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
+ ast.kills ? "discard" : "exit");
+ }
+
+ void operator()(const ASTBreak& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
+ }
+
+ void Visit(const ASTNode& node) {
+ std::visit(*this, *node->GetInnerData());
+ }
+
+ const std::string& GetResult() const {
+ return inner;
+ }
+
+private:
+ std::string_view Indent() {
+ if (space_segment_scope == scope) {
+ return space_segment;
+ }
+
+ // Ensure that we don't exceed our view.
+ ASSERT(scope * 2 < spaces.size());
+
+ space_segment = spaces.substr(0, scope * 2);
+ space_segment_scope = scope;
+ return space_segment;
+ }
+
+ std::string inner{};
+ std::string_view space_segment;
+
+ u32 scope{};
+ u32 space_segment_scope{};
+
+ static constexpr std::string_view spaces{" "};
+};
+
+std::string ASTManager::Print() const {
+ ASTPrinter printer{};
+ printer.Visit(main_node);
+ return printer.GetResult();
+}
+
+ASTManager::ASTManager(bool full_decompile, bool disable_else_derivation)
+ : full_decompile{full_decompile}, disable_else_derivation{disable_else_derivation} {};
+
+ASTManager::~ASTManager() {
+ Clear();
+}
+
+void ASTManager::Init() {
+ main_node = ASTBase::Make<ASTProgram>(ASTNode{});
+ program = std::get_if<ASTProgram>(main_node->GetInnerData());
+ false_condition = MakeExpr<ExprBoolean>(false);
+}
+
+void ASTManager::DeclareLabel(u32 address) {
+ const auto pair = labels_map.emplace(address, labels_count);
+ if (pair.second) {
+ labels_count++;
+ labels.resize(labels_count);
+ }
+}
+
+void ASTManager::InsertLabel(u32 address) {
+ const u32 index = labels_map[address];
+ const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index);
+ labels[index] = label;
+ program->nodes.PushBack(label);
+}
+
+void ASTManager::InsertGoto(Expr condition, u32 address) {
+ const u32 index = labels_map[address];
+ const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index);
+ gotos.push_back(goto_node);
+ program->nodes.PushBack(goto_node);
+}
+
+void ASTManager::InsertBlock(u32 start_address, u32 end_address) {
+ ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address);
+ program->nodes.PushBack(std::move(block));
+}
+
+void ASTManager::InsertReturn(Expr condition, bool kills) {
+ ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills);
+ program->nodes.PushBack(std::move(node));
+}
+
+// The decompile algorithm is based on
+// "Taming control flow: A structured approach to eliminating goto statements"
+// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
+// on the same structured level as the label which they jump to. This is done,
+// through outward/inward movements and lifting. Once they are at the same
+// level, you can enclose them in an "if" structure or a "do-while" structure.
+void ASTManager::Decompile() {
+ auto it = gotos.begin();
+ while (it != gotos.end()) {
+ const ASTNode goto_node = *it;
+ const auto label_index = goto_node->GetGotoLabel();
+ if (!label_index) {
+ return;
+ }
+ const ASTNode label = labels[*label_index];
+ if (!full_decompile) {
+ // We only decompile backward jumps
+ if (!IsBackwardsJump(goto_node, label)) {
+ it++;
+ continue;
+ }
+ }
+ if (IndirectlyRelated(goto_node, label)) {
+ while (!DirectlyRelated(goto_node, label)) {
+ MoveOutward(goto_node);
+ }
+ }
+ if (DirectlyRelated(goto_node, label)) {
+ u32 goto_level = goto_node->GetLevel();
+ const u32 label_level = label->GetLevel();
+ while (label_level < goto_level) {
+ MoveOutward(goto_node);
+ goto_level--;
+ }
+ // TODO(Blinkhawk): Implement Lifting and Inward Movements
+ }
+ if (label->GetParent() == goto_node->GetParent()) {
+ bool is_loop = false;
+ ASTNode current = goto_node->GetPrevious();
+ while (current) {
+ if (current == label) {
+ is_loop = true;
+ break;
+ }
+ current = current->GetPrevious();
+ }
+
+ if (is_loop) {
+ EncloseDoWhile(goto_node, label);
+ } else {
+ EncloseIfThen(goto_node, label);
+ }
+ it = gotos.erase(it);
+ continue;
+ }
+ it++;
+ }
+ if (full_decompile) {
+ for (const ASTNode& label : labels) {
+ auto& manager = label->GetManager();
+ manager.Remove(label);
+ }
+ labels.clear();
+ } else {
+ auto label_it = labels.begin();
+ while (label_it != labels.end()) {
+ bool can_remove = true;
+ ASTNode label = *label_it;
+ for (const ASTNode& goto_node : gotos) {
+ const auto label_index = goto_node->GetGotoLabel();
+ if (!label_index) {
+ return;
+ }
+ ASTNode& glabel = labels[*label_index];
+ if (glabel == label) {
+ can_remove = false;
+ break;
+ }
+ }
+ if (can_remove) {
+ label->MarkLabelUnused();
+ }
+ }
+ }
+}
+
+bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const {
+ u32 goto_level = goto_node->GetLevel();
+ u32 label_level = label_node->GetLevel();
+ while (goto_level > label_level) {
+ goto_level--;
+ goto_node = goto_node->GetParent();
+ }
+ while (label_level > goto_level) {
+ label_level--;
+ label_node = label_node->GetParent();
+ }
+ while (goto_node->GetParent() != label_node->GetParent()) {
+ goto_node = goto_node->GetParent();
+ label_node = label_node->GetParent();
+ }
+ ASTNode current = goto_node->GetPrevious();
+ while (current) {
+ if (current == label_node) {
+ return true;
+ }
+ current = current->GetPrevious();
+ }
+ return false;
+}
+
+bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const {
+ return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second));
+}
+
+bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const {
+ if (first->GetParent() == second->GetParent()) {
+ return false;
+ }
+ const u32 first_level = first->GetLevel();
+ const u32 second_level = second->GetLevel();
+ u32 min_level;
+ u32 max_level;
+ ASTNode max;
+ ASTNode min;
+ if (first_level > second_level) {
+ min_level = second_level;
+ min = second;
+ max_level = first_level;
+ max = first;
+ } else {
+ min_level = first_level;
+ min = first;
+ max_level = second_level;
+ max = second;
+ }
+
+ while (max_level > min_level) {
+ max_level--;
+ max = max->GetParent();
+ }
+
+ return min->GetParent() == max->GetParent();
+}
+
+void ASTManager::ShowCurrentState(std::string_view state) const {
+ LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
+ SanityCheck();
+}
+
+void ASTManager::SanityCheck() const {
+ for (const auto& label : labels) {
+ if (!label->GetParent()) {
+ LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
+ }
+ }
+}
+
+void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) {
+ ASTZipper& zipper = goto_node->GetManager();
+ const ASTNode loop_start = label->GetNext();
+ if (loop_start == goto_node) {
+ zipper.Remove(goto_node);
+ return;
+ }
+ const ASTNode parent = label->GetParent();
+ const Expr condition = goto_node->GetGotoCondition();
+ zipper.DetachSegment(loop_start, goto_node);
+ const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition);
+ ASTZipper* sub_zipper = do_while_node->GetSubNodes();
+ sub_zipper->Init(loop_start, do_while_node);
+ zipper.InsertAfter(do_while_node, label);
+ sub_zipper->Remove(goto_node);
+}
+
+void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) {
+ ASTZipper& zipper = goto_node->GetManager();
+ const ASTNode if_end = label->GetPrevious();
+ if (if_end == goto_node) {
+ zipper.Remove(goto_node);
+ return;
+ }
+ const ASTNode prev = goto_node->GetPrevious();
+ const Expr condition = goto_node->GetGotoCondition();
+ bool do_else = false;
+ if (!disable_else_derivation && prev->IsIfThen()) {
+ const Expr if_condition = prev->GetIfCondition();
+ do_else = ExprAreEqual(if_condition, condition);
+ }
+ const ASTNode parent = label->GetParent();
+ zipper.DetachSegment(goto_node, if_end);
+ ASTNode if_node;
+ if (do_else) {
+ if_node = ASTBase::Make<ASTIfElse>(parent);
+ } else {
+ Expr neg_condition = MakeExprNot(condition);
+ if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition);
+ }
+ ASTZipper* sub_zipper = if_node->GetSubNodes();
+ sub_zipper->Init(goto_node, if_node);
+ zipper.InsertAfter(if_node, prev);
+ sub_zipper->Remove(goto_node);
+}
+
+void ASTManager::MoveOutward(ASTNode goto_node) {
+ ASTZipper& zipper = goto_node->GetManager();
+ const ASTNode parent = goto_node->GetParent();
+ ASTZipper& zipper2 = parent->GetManager();
+ const ASTNode grandpa = parent->GetParent();
+ const bool is_loop = parent->IsLoop();
+ const bool is_else = parent->IsIfElse();
+ const bool is_if = parent->IsIfThen();
+
+ const ASTNode prev = goto_node->GetPrevious();
+ const ASTNode post = goto_node->GetNext();
+
+ const Expr condition = goto_node->GetGotoCondition();
+ zipper.DetachSingle(goto_node);
+ if (is_loop) {
+ const u32 var_index = NewVariable();
+ const Expr var_condition = MakeExpr<ExprVar>(var_index);
+ const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
+ const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
+ zipper2.InsertBefore(var_node_init, parent);
+ zipper.InsertAfter(var_node, prev);
+ goto_node->SetGotoCondition(var_condition);
+ const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition);
+ zipper.InsertAfter(break_node, var_node);
+ } else if (is_if || is_else) {
+ const u32 var_index = NewVariable();
+ const Expr var_condition = MakeExpr<ExprVar>(var_index);
+ const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
+ const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
+ if (is_if) {
+ zipper2.InsertBefore(var_node_init, parent);
+ } else {
+ zipper2.InsertBefore(var_node_init, parent->GetPrevious());
+ }
+ zipper.InsertAfter(var_node, prev);
+ goto_node->SetGotoCondition(var_condition);
+ if (post) {
+ zipper.DetachTail(post);
+ const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition));
+ ASTZipper* sub_zipper = if_node->GetSubNodes();
+ sub_zipper->Init(post, if_node);
+ zipper.InsertAfter(if_node, var_node);
+ }
+ } else {
+ UNREACHABLE();
+ }
+ const ASTNode next = parent->GetNext();
+ if (is_if && next && next->IsIfElse()) {
+ zipper2.InsertAfter(goto_node, next);
+ goto_node->SetParent(grandpa);
+ return;
+ }
+ zipper2.InsertAfter(goto_node, parent);
+ goto_node->SetParent(grandpa);
+}
+
+class ASTClearer {
+public:
+ ASTClearer() = default;
+
+ void operator()(const ASTProgram& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(const ASTIfThen& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(const ASTIfElse& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {}
+
+ void operator()(ASTBlockDecoded& ast) {
+ ast.nodes.clear();
+ }
+
+ void operator()([[maybe_unused]] const ASTVarSet& ast) {}
+
+ void operator()([[maybe_unused]] const ASTLabel& ast) {}
+
+ void operator()([[maybe_unused]] const ASTGoto& ast) {}
+
+ void operator()(const ASTDoWhile& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()([[maybe_unused]] const ASTReturn& ast) {}
+
+ void operator()([[maybe_unused]] const ASTBreak& ast) {}
+
+ void Visit(const ASTNode& node) {
+ std::visit(*this, *node->GetInnerData());
+ node->Clear();
+ }
+};
+
+void ASTManager::Clear() {
+ if (!main_node) {
+ return;
+ }
+ ASTClearer clearer{};
+ clearer.Visit(main_node);
+ main_node.reset();
+ program = nullptr;
+ labels_map.clear();
+ labels.clear();
+ gotos.clear();
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
new file mode 100644
index 000000000..a2f0044ba
--- /dev/null
+++ b/src/video_core/shader/ast.h
@@ -0,0 +1,400 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <functional>
+#include <list>
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "video_core/shader/expr.h"
+#include "video_core/shader/node.h"
+
+namespace VideoCommon::Shader {
+
+class ASTBase;
+class ASTBlockDecoded;
+class ASTBlockEncoded;
+class ASTBreak;
+class ASTDoWhile;
+class ASTGoto;
+class ASTIfElse;
+class ASTIfThen;
+class ASTLabel;
+class ASTProgram;
+class ASTReturn;
+class ASTVarSet;
+
+using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded,
+ ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>;
+
+using ASTNode = std::shared_ptr<ASTBase>;
+
+enum class ASTZipperType : u32 {
+ Program,
+ IfThen,
+ IfElse,
+ Loop,
+};
+
+class ASTZipper final {
+public:
+ explicit ASTZipper();
+
+ void Init(ASTNode first, ASTNode parent);
+
+ ASTNode GetFirst() const {
+ return first;
+ }
+
+ ASTNode GetLast() const {
+ return last;
+ }
+
+ void PushBack(ASTNode new_node);
+ void PushFront(ASTNode new_node);
+ void InsertAfter(ASTNode new_node, ASTNode at_node);
+ void InsertBefore(ASTNode new_node, ASTNode at_node);
+ void DetachTail(ASTNode node);
+ void DetachSingle(ASTNode node);
+ void DetachSegment(ASTNode start, ASTNode end);
+ void Remove(ASTNode node);
+
+ ASTNode first{};
+ ASTNode last{};
+};
+
+class ASTProgram {
+public:
+ ASTZipper nodes{};
+};
+
+class ASTIfThen {
+public:
+ explicit ASTIfThen(Expr condition) : condition{std::move(condition)} {}
+ Expr condition;
+ ASTZipper nodes{};
+};
+
+class ASTIfElse {
+public:
+ ASTZipper nodes{};
+};
+
+class ASTBlockEncoded {
+public:
+ explicit ASTBlockEncoded(u32 start, u32 end) : start{start}, end{end} {}
+ u32 start;
+ u32 end;
+};
+
+class ASTBlockDecoded {
+public:
+ explicit ASTBlockDecoded(NodeBlock&& new_nodes) : nodes(std::move(new_nodes)) {}
+ NodeBlock nodes;
+};
+
+class ASTVarSet {
+public:
+ explicit ASTVarSet(u32 index, Expr condition) : index{index}, condition{std::move(condition)} {}
+ u32 index;
+ Expr condition;
+};
+
+class ASTLabel {
+public:
+ explicit ASTLabel(u32 index) : index{index} {}
+ u32 index;
+ bool unused{};
+};
+
+class ASTGoto {
+public:
+ explicit ASTGoto(Expr condition, u32 label) : condition{std::move(condition)}, label{label} {}
+ Expr condition;
+ u32 label;
+};
+
+class ASTDoWhile {
+public:
+ explicit ASTDoWhile(Expr condition) : condition{std::move(condition)} {}
+ Expr condition;
+ ASTZipper nodes{};
+};
+
+class ASTReturn {
+public:
+ explicit ASTReturn(Expr condition, bool kills)
+ : condition{std::move(condition)}, kills{kills} {}
+ Expr condition;
+ bool kills;
+};
+
+class ASTBreak {
+public:
+ explicit ASTBreak(Expr condition) : condition{std::move(condition)} {}
+ Expr condition;
+};
+
+class ASTBase {
+public:
+ explicit ASTBase(ASTNode parent, ASTData data)
+ : data{std::move(data)}, parent{std::move(parent)} {}
+
+ template <class U, class... Args>
+ static ASTNode Make(ASTNode parent, Args&&... args) {
+ return std::make_shared<ASTBase>(std::move(parent),
+ ASTData(U(std::forward<Args>(args)...)));
+ }
+
+ void SetParent(ASTNode new_parent) {
+ parent = std::move(new_parent);
+ }
+
+ ASTNode& GetParent() {
+ return parent;
+ }
+
+ const ASTNode& GetParent() const {
+ return parent;
+ }
+
+ u32 GetLevel() const {
+ u32 level = 0;
+ auto next_parent = parent;
+ while (next_parent) {
+ next_parent = next_parent->GetParent();
+ level++;
+ }
+ return level;
+ }
+
+ ASTData* GetInnerData() {
+ return &data;
+ }
+
+ const ASTData* GetInnerData() const {
+ return &data;
+ }
+
+ ASTNode GetNext() const {
+ return next;
+ }
+
+ ASTNode GetPrevious() const {
+ return previous;
+ }
+
+ ASTZipper& GetManager() {
+ return *manager;
+ }
+
+ const ASTZipper& GetManager() const {
+ return *manager;
+ }
+
+ std::optional<u32> GetGotoLabel() const {
+ auto inner = std::get_if<ASTGoto>(&data);
+ if (inner) {
+ return {inner->label};
+ }
+ return {};
+ }
+
+ Expr GetGotoCondition() const {
+ auto inner = std::get_if<ASTGoto>(&data);
+ if (inner) {
+ return inner->condition;
+ }
+ return nullptr;
+ }
+
+ void MarkLabelUnused() {
+ auto inner = std::get_if<ASTLabel>(&data);
+ if (inner) {
+ inner->unused = true;
+ }
+ }
+
+ bool IsLabelUnused() const {
+ auto inner = std::get_if<ASTLabel>(&data);
+ if (inner) {
+ return inner->unused;
+ }
+ return true;
+ }
+
+ std::optional<u32> GetLabelIndex() const {
+ auto inner = std::get_if<ASTLabel>(&data);
+ if (inner) {
+ return {inner->index};
+ }
+ return {};
+ }
+
+ Expr GetIfCondition() const {
+ auto inner = std::get_if<ASTIfThen>(&data);
+ if (inner) {
+ return inner->condition;
+ }
+ return nullptr;
+ }
+
+ void SetGotoCondition(Expr new_condition) {
+ auto inner = std::get_if<ASTGoto>(&data);
+ if (inner) {
+ inner->condition = std::move(new_condition);
+ }
+ }
+
+ bool IsIfThen() const {
+ return std::holds_alternative<ASTIfThen>(data);
+ }
+
+ bool IsIfElse() const {
+ return std::holds_alternative<ASTIfElse>(data);
+ }
+
+ bool IsBlockEncoded() const {
+ return std::holds_alternative<ASTBlockEncoded>(data);
+ }
+
+ void TransformBlockEncoded(NodeBlock&& nodes) {
+ data = ASTBlockDecoded(std::move(nodes));
+ }
+
+ bool IsLoop() const {
+ return std::holds_alternative<ASTDoWhile>(data);
+ }
+
+ ASTZipper* GetSubNodes() {
+ if (std::holds_alternative<ASTProgram>(data)) {
+ return &std::get_if<ASTProgram>(&data)->nodes;
+ }
+ if (std::holds_alternative<ASTIfThen>(data)) {
+ return &std::get_if<ASTIfThen>(&data)->nodes;
+ }
+ if (std::holds_alternative<ASTIfElse>(data)) {
+ return &std::get_if<ASTIfElse>(&data)->nodes;
+ }
+ if (std::holds_alternative<ASTDoWhile>(data)) {
+ return &std::get_if<ASTDoWhile>(&data)->nodes;
+ }
+ return nullptr;
+ }
+
+ void Clear() {
+ next.reset();
+ previous.reset();
+ parent.reset();
+ manager = nullptr;
+ }
+
+private:
+ friend class ASTZipper;
+
+ ASTData data;
+ ASTNode parent{};
+ ASTNode next{};
+ ASTNode previous{};
+ ASTZipper* manager{};
+};
+
+class ASTManager final {
+public:
+ ASTManager(bool full_decompile, bool disable_else_derivation);
+ ~ASTManager();
+
+ ASTManager(const ASTManager& o) = delete;
+ ASTManager& operator=(const ASTManager& other) = delete;
+
+ ASTManager(ASTManager&& other) noexcept = default;
+ ASTManager& operator=(ASTManager&& other) noexcept = default;
+
+ void Init();
+
+ void DeclareLabel(u32 address);
+
+ void InsertLabel(u32 address);
+
+ void InsertGoto(Expr condition, u32 address);
+
+ void InsertBlock(u32 start_address, u32 end_address);
+
+ void InsertReturn(Expr condition, bool kills);
+
+ std::string Print() const;
+
+ void Decompile();
+
+ void ShowCurrentState(std::string_view state) const;
+
+ void SanityCheck() const;
+
+ void Clear();
+
+ bool IsFullyDecompiled() const {
+ if (full_decompile) {
+ return gotos.empty();
+ }
+
+ for (ASTNode goto_node : gotos) {
+ auto label_index = goto_node->GetGotoLabel();
+ if (!label_index) {
+ return false;
+ }
+ ASTNode glabel = labels[*label_index];
+ if (IsBackwardsJump(goto_node, glabel)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ ASTNode GetProgram() const {
+ return main_node;
+ }
+
+ u32 GetVariables() const {
+ return variables;
+ }
+
+ const std::vector<ASTNode>& GetLabels() const {
+ return labels;
+ }
+
+private:
+ bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const;
+
+ bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const;
+
+ bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const;
+
+ void EncloseDoWhile(ASTNode goto_node, ASTNode label);
+
+ void EncloseIfThen(ASTNode goto_node, ASTNode label);
+
+ void MoveOutward(ASTNode goto_node);
+
+ u32 NewVariable() {
+ return variables++;
+ }
+
+ bool full_decompile{};
+ bool disable_else_derivation{};
+ std::unordered_map<u32, u32> labels_map{};
+ u32 labels_count{};
+ std::vector<ASTNode> labels{};
+ std::list<ASTNode> gotos{};
+ u32 variables{};
+ ASTProgram* program{};
+ ASTNode main_node{};
+ Expr false_condition{};
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp
new file mode 100644
index 000000000..cddcbd4f0
--- /dev/null
+++ b/src/video_core/shader/compiler_settings.cpp
@@ -0,0 +1,26 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/shader/compiler_settings.h"
+
+namespace VideoCommon::Shader {
+
+std::string CompileDepthAsString(const CompileDepth cd) {
+ switch (cd) {
+ case CompileDepth::BruteForce:
+ return "Brute Force Compile";
+ case CompileDepth::FlowStack:
+ return "Simple Flow Stack Mode";
+ case CompileDepth::NoFlowStack:
+ return "Remove Flow Stack";
+ case CompileDepth::DecompileBackwards:
+ return "Decompile Backward Jumps";
+ case CompileDepth::FullDecompile:
+ return "Full Decompilation";
+ default:
+ return "Unknown Compiler Process";
+ }
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h
new file mode 100644
index 000000000..916018c01
--- /dev/null
+++ b/src/video_core/shader/compiler_settings.h
@@ -0,0 +1,26 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/engines/shader_bytecode.h"
+
+namespace VideoCommon::Shader {
+
+enum class CompileDepth : u32 {
+ BruteForce = 0,
+ FlowStack = 1,
+ NoFlowStack = 2,
+ DecompileBackwards = 3,
+ FullDecompile = 4,
+};
+
+std::string CompileDepthAsString(CompileDepth cd);
+
+struct CompilerSettings {
+ CompileDepth depth{CompileDepth::NoFlowStack};
+ bool disable_else_derivation{true};
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
new file mode 100644
index 000000000..fe467608e
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -0,0 +1,110 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/const_buffer_locker.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Engines::SamplerDescriptor;
+
+ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
+ : stage{shader_stage} {}
+
+ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine)
+ : stage{shader_stage}, engine{&engine} {}
+
+ConstBufferLocker::~ConstBufferLocker() = default;
+
+std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
+ const std::pair<u32, u32> key = {buffer, offset};
+ const auto iter = keys.find(key);
+ if (iter != keys.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
+ keys.emplace(key, value);
+ return value;
+}
+
+std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
+ const u32 key = offset;
+ const auto iter = bound_samplers.find(key);
+ if (iter != bound_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
+ bound_samplers.emplace(key, value);
+ return value;
+}
+
+std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
+ u32 buffer, u32 offset) {
+ const std::pair key = {buffer, offset};
+ const auto iter = bindless_samplers.find(key);
+ if (iter != bindless_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
+ bindless_samplers.emplace(key, value);
+ return value;
+}
+
+void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
+ keys.insert_or_assign({buffer, offset}, value);
+}
+
+void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
+ bound_samplers.insert_or_assign(offset, sampler);
+}
+
+void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
+ bindless_samplers.insert_or_assign({buffer, offset}, sampler);
+}
+
+bool ConstBufferLocker::IsConsistent() const {
+ if (!engine) {
+ return false;
+ }
+ return std::all_of(keys.begin(), keys.end(),
+ [this](const auto& pair) {
+ const auto [cbuf, offset] = pair.first;
+ const auto value = pair.second;
+ return value == engine->AccessConstBuffer32(stage, cbuf, offset);
+ }) &&
+ std::all_of(bound_samplers.begin(), bound_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [key, value] = sampler;
+ return value == engine->AccessBoundSampler(stage, key);
+ }) &&
+ std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [cbuf, offset] = sampler.first;
+ const auto value = sampler.second;
+ return value == engine->AccessBindlessSampler(stage, cbuf, offset);
+ });
+}
+
+bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
+ return keys == rhs.keys && bound_samplers == rhs.bound_samplers &&
+ bindless_samplers == rhs.bindless_samplers;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
new file mode 100644
index 000000000..600e2f3c3
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+#include "common/common_types.h"
+#include "common/hash.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
+
+namespace VideoCommon::Shader {
+
+using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
+using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using BindlessSamplerMap =
+ std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
+
+/**
+ * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
+ * compiler. with it, the shader can obtain required data from GPU state and store it for disk
+ * shader compilation.
+ **/
+class ConstBufferLocker {
+public:
+ explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
+
+ explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine);
+
+ ~ConstBufferLocker();
+
+ /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
+ /// not it will obtain it from maxwell3d and register it.
+ std::optional<u32> ObtainKey(u32 buffer, u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
+
+ /// Inserts a key.
+ void InsertKey(u32 buffer, u32 offset, u32 value);
+
+ /// Inserts a bound sampler key.
+ void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Inserts a bindless sampler key.
+ void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Checks keys and samplers against engine's current const buffers. Returns true if they are
+ /// the same value, false otherwise;
+ bool IsConsistent() const;
+
+ /// Returns true if the keys are equal to the other ones in the locker.
+ bool HasEqualKeys(const ConstBufferLocker& rhs) const;
+
+ /// Gives an getter to the const buffer keys in the database.
+ const KeyMap& GetKeys() const {
+ return keys;
+ }
+
+ /// Gets samplers database.
+ const BoundSamplerMap& GetBoundSamplers() const {
+ return bound_samplers;
+ }
+
+ /// Gets bindless samplers database.
+ const BindlessSamplerMap& GetBindlessSamplers() const {
+ return bindless_samplers;
+ }
+
+private:
+ const Tegra::Engines::ShaderType stage;
+ Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
+ KeyMap keys;
+ BoundSamplerMap bound_samplers;
+ BindlessSamplerMap bindless_samplers;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index ec3a76690..b427ac873 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -4,18 +4,21 @@
#include <list>
#include <map>
+#include <set>
#include <stack>
#include <unordered_map>
-#include <unordered_set>
#include <vector>
#include "common/assert.h"
#include "common/common_types.h"
+#include "video_core/shader/ast.h"
#include "video_core/shader/control_flow.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
+
namespace {
+
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
@@ -34,14 +37,20 @@ struct BlockStack {
std::stack<u32> pbk_stack{};
};
-struct BlockBranchInfo {
- Condition condition{};
- s32 address{exit_branch};
- bool kill{};
- bool is_sync{};
- bool is_brk{};
- bool ignore{};
-};
+template <typename T, typename... Args>
+BlockBranchInfo MakeBranchInfo(Args&&... args) {
+ static_assert(std::is_convertible_v<T, BranchData>);
+ return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
+}
+
+bool BlockBranchIsIgnored(BlockBranchInfo first) {
+ bool ignore = false;
+ if (std::holds_alternative<SingleBranch>(*first)) {
+ const auto branch = std::get_if<SingleBranch>(first.get());
+ ignore = branch->ignore;
+ }
+ return ignore;
+}
struct BlockInfo {
u32 start{};
@@ -55,21 +64,21 @@ struct BlockInfo {
};
struct CFGRebuildState {
- explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
- const u32 start)
- : start{start}, program_code{program_code}, program_size{program_size} {}
+ explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
+ : program_code{program_code}, start{start}, locker{locker} {}
- u32 start{};
- std::vector<BlockInfo> block_info{};
- std::list<u32> inspect_queries{};
- std::list<Query> queries{};
- std::unordered_map<u32, u32> registered{};
- std::unordered_set<u32> labels{};
- std::map<u32, u32> ssy_labels{};
- std::map<u32, u32> pbk_labels{};
- std::unordered_map<u32, BlockStack> stacks{};
const ProgramCode& program_code;
- const std::size_t program_size;
+ ConstBufferLocker& locker;
+ u32 start{};
+ std::vector<BlockInfo> block_info;
+ std::list<u32> inspect_queries;
+ std::list<Query> queries;
+ std::unordered_map<u32, u32> registered;
+ std::set<u32> labels;
+ std::map<u32, u32> ssy_labels;
+ std::map<u32, u32> pbk_labels;
+ std::unordered_map<u32, BlockStack> stacks;
+ ASTManager* manager{};
};
enum class BlockCollision : u32 { None, Found, Inside };
@@ -102,7 +111,7 @@ BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
}
Pred GetPredicate(u32 index, bool negated) {
- return static_cast<Pred>(index + (negated ? 8 : 0));
+ return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
}
/**
@@ -122,10 +131,122 @@ enum class ParseResult : u32 {
AbnormalFlow,
};
+struct BranchIndirectInfo {
+ u32 buffer{};
+ u32 offset{};
+ u32 entries{};
+ s32 relative_position{};
+};
+
+struct BufferInfo {
+ u32 index;
+ u32 offset;
+};
+
+std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
+ const Instruction instr = state.program_code[pos];
+ const auto opcode = OpCode::Decode(instr);
+ if (opcode->get().GetId() != OpCode::Id::BRX) {
+ return std::nullopt;
+ }
+ if (instr.brx.constant_buffer != 0) {
+ return std::nullopt;
+ }
+ --pos;
+ return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
+}
+
+template <typename Result, typename TestCallable, typename PackCallable>
+// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
+// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
+std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
+ PackCallable pack) {
+ for (; pos >= state.start; --pos) {
+ if (IsSchedInstruction(pos, state.start)) {
+ continue;
+ }
+ const Instruction instr = state.program_code[pos];
+ const auto opcode = OpCode::Decode(instr);
+ if (!opcode) {
+ continue;
+ }
+ if (test(instr, opcode->get())) {
+ --pos;
+ return std::make_optional(pack(instr, opcode->get()));
+ }
+ }
+ return std::nullopt;
+}
+
+std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
+ u64 brx_tracked_register) {
+ return TrackInstruction<std::pair<BufferInfo, u64>>(
+ state, pos,
+ [brx_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::LD_C &&
+ instr.gpr0.Value() == brx_tracked_register &&
+ instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
+ },
+ [](auto instr, const auto& opcode) {
+ const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
+ static_cast<u32>(instr.cbuf36.GetOffset())};
+ return std::make_pair(info, instr.gpr8.Value());
+ });
+}
+
+std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
+ u64 ldc_tracked_register) {
+ return TrackInstruction<u64>(state, pos,
+ [ldc_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::SHL_IMM &&
+ instr.gpr0.Value() == ldc_tracked_register;
+ },
+ [](auto instr, const auto&) { return instr.gpr8.Value(); });
+}
+
+std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
+ u64 shl_tracked_register) {
+ return TrackInstruction<u32>(state, pos,
+ [shl_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+ instr.gpr0.Value() == shl_tracked_register;
+ },
+ [](auto instr, const auto&) {
+ return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+ });
+}
+
+std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
+ const auto brx_info = GetBRXInfo(state, pos);
+ if (!brx_info) {
+ return std::nullopt;
+ }
+ const auto [relative_position, brx_tracked_register] = *brx_info;
+
+ const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
+ if (!ldc_info) {
+ return std::nullopt;
+ }
+ const auto [buffer_info, ldc_tracked_register] = *ldc_info;
+
+ const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
+ if (!shl_tracked_register) {
+ return std::nullopt;
+ }
+
+ const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
+ if (!entries) {
+ return std::nullopt;
+ }
+
+ return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
+}
+
std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
u32 offset = static_cast<u32>(address);
- const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction));
+ const u32 end_address = static_cast<u32>(state.program_code.size());
ParseInfo parse_info{};
+ SingleBranch single_branch{};
const auto insert_label = [](CFGRebuildState& state, u32 address) {
const auto pair = state.labels.emplace(address);
@@ -138,13 +259,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
if (offset >= end_address) {
// ASSERT_OR_EXECUTE can't be used, as it ignores the break
ASSERT_MSG(false, "Shader passed the current limit!");
- parse_info.branch_info.address = exit_branch;
- parse_info.branch_info.ignore = false;
+
+ single_branch.address = exit_branch;
+ single_branch.ignore = false;
break;
}
if (state.registered.count(offset) != 0) {
- parse_info.branch_info.address = offset;
- parse_info.branch_info.ignore = true;
+ single_branch.address = offset;
+ single_branch.ignore = true;
break;
}
if (IsSchedInstruction(offset, state.start)) {
@@ -161,24 +283,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
switch (opcode->get().GetId()) {
case OpCode::Id::EXIT: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = exit_branch;
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.address = exit_branch;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
@@ -187,99 +311,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
return {ParseResult::AbnormalFlow, parse_info};
}
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
const u32 branch_offset = offset + instr.bra.GetBranchTarget();
if (branch_offset == 0) {
- parse_info.branch_info.address = exit_branch;
+ single_branch.address = exit_branch;
} else {
- parse_info.branch_info.address = branch_offset;
+ single_branch.address = branch_offset;
}
insert_label(state, branch_offset);
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::SYNC: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = unassigned_branch;
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = true;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.address = unassigned_branch;
+ single_branch.kill = false;
+ single_branch.is_sync = true;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::BRK: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = unassigned_branch;
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = true;
- parse_info.branch_info.ignore = false;
+ single_branch.address = unassigned_branch;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = true;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::KIL: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = exit_branch;
- parse_info.branch_info.kill = true;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.address = exit_branch;
+ single_branch.kill = true;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
@@ -296,7 +428,30 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
break;
}
case OpCode::Id::BRX: {
- return {ParseResult::AbnormalFlow, parse_info};
+ const auto tmp = TrackBranchIndirectInfo(state, offset);
+ if (!tmp) {
+ LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
+ return {ParseResult::AbnormalFlow, parse_info};
+ }
+
+ const auto result = *tmp;
+ const s32 pc_target = offset + result.relative_position;
+ std::vector<CaseBranch> branches;
+ for (u32 i = 0; i < result.entries; i++) {
+ auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
+ if (!key) {
+ return {ParseResult::AbnormalFlow, parse_info};
+ }
+ u32 value = *key;
+ u32 target = static_cast<u32>((value >> 3) + pc_target);
+ insert_label(state, target);
+ branches.emplace_back(value, target);
+ }
+ parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<MultiBranch>(
+ static_cast<u32>(instr.gpr8.Value()), std::move(branches));
+
+ return {ParseResult::ControlCaught, parse_info};
}
default:
break;
@@ -304,10 +459,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
offset++;
}
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
parse_info.end_address = offset - 1;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
+ single_branch.is_brk, single_branch.ignore);
return {ParseResult::BlockEnd, parse_info};
}
@@ -331,9 +489,10 @@ bool TryInspectAddress(CFGRebuildState& state) {
BlockInfo& current_block = state.block_info[block_index];
current_block.end = address - 1;
new_block.branch = current_block.branch;
- BlockBranchInfo forward_branch{};
- forward_branch.address = address;
- forward_branch.ignore = true;
+ BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
+ const auto branch = std::get_if<SingleBranch>(forward_branch.get());
+ branch->address = address;
+ branch->ignore = true;
current_block.branch = forward_branch;
return true;
}
@@ -348,12 +507,15 @@ bool TryInspectAddress(CFGRebuildState& state) {
BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
block_info.branch = parse_info.branch_info;
- if (parse_info.branch_info.condition.IsUnconditional()) {
+ if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
+ const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
+ if (branch->condition.IsUnconditional()) {
+ return true;
+ }
+ const u32 fallthrough_address = parse_info.end_address + 1;
+ state.inspect_queries.push_front(fallthrough_address);
return true;
}
-
- const u32 fallthrough_address = parse_info.end_address + 1;
- state.inspect_queries.push_front(fallthrough_address);
return true;
}
@@ -391,91 +553,205 @@ bool TryQuery(CFGRebuildState& state) {
state.queries.pop_front();
gather_labels(q2.ssy_stack, state.ssy_labels, block);
gather_labels(q2.pbk_stack, state.pbk_labels, block);
- if (!block.branch.condition.IsUnconditional()) {
- q2.address = block.end + 1;
- state.queries.push_back(q2);
- }
+ if (std::holds_alternative<SingleBranch>(*block.branch)) {
+ const auto branch = std::get_if<SingleBranch>(block.branch.get());
+ if (!branch->condition.IsUnconditional()) {
+ q2.address = block.end + 1;
+ state.queries.push_back(q2);
+ }
- Query conditional_query{q2};
- if (block.branch.is_sync) {
- if (block.branch.address == unassigned_branch) {
- block.branch.address = conditional_query.ssy_stack.top();
+ Query conditional_query{q2};
+ if (branch->is_sync) {
+ if (branch->address == unassigned_branch) {
+ branch->address = conditional_query.ssy_stack.top();
+ }
+ conditional_query.ssy_stack.pop();
}
- conditional_query.ssy_stack.pop();
- }
- if (block.branch.is_brk) {
- if (block.branch.address == unassigned_branch) {
- block.branch.address = conditional_query.pbk_stack.top();
+ if (branch->is_brk) {
+ if (branch->address == unassigned_branch) {
+ branch->address = conditional_query.pbk_stack.top();
+ }
+ conditional_query.pbk_stack.pop();
}
- conditional_query.pbk_stack.pop();
+ conditional_query.address = branch->address;
+ state.queries.push_back(std::move(conditional_query));
+ return true;
+ }
+ const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+ for (const auto& branch_case : multi_branch->branches) {
+ Query conditional_query{q2};
+ conditional_query.address = branch_case.address;
+ state.queries.push_back(std::move(conditional_query));
}
- conditional_query.address = block.branch.address;
- state.queries.push_back(std::move(conditional_query));
return true;
}
+
} // Anonymous namespace
-std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
- std::size_t program_size, u32 start_address) {
- CFGRebuildState state{program_code, program_size, start_address};
+void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
+ const auto get_expr = ([&](const Condition& cond) -> Expr {
+ Expr result{};
+ if (cond.cc != ConditionCode::T) {
+ result = MakeExpr<ExprCondCode>(cond.cc);
+ }
+ if (cond.predicate != Pred::UnusedIndex) {
+ u32 pred = static_cast<u32>(cond.predicate);
+ bool negate = false;
+ if (pred > 7) {
+ negate = true;
+ pred -= 8;
+ }
+ Expr extra = MakeExpr<ExprPredicate>(pred);
+ if (negate) {
+ extra = MakeExpr<ExprNot>(extra);
+ }
+ if (result) {
+ return MakeExpr<ExprAnd>(extra, result);
+ }
+ return extra;
+ }
+ if (result) {
+ return result;
+ }
+ return MakeExpr<ExprBoolean>(true);
+ });
+ if (std::holds_alternative<SingleBranch>(*branch_info)) {
+ const auto branch = std::get_if<SingleBranch>(branch_info.get());
+ if (branch->address < 0) {
+ if (branch->kill) {
+ mm.InsertReturn(get_expr(branch->condition), true);
+ return;
+ }
+ mm.InsertReturn(get_expr(branch->condition), false);
+ return;
+ }
+ mm.InsertGoto(get_expr(branch->condition), branch->address);
+ return;
+ }
+ const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
+ for (const auto& branch_case : multi_branch->branches) {
+ mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
+ branch_case.address);
+ }
+}
+
+void DecompileShader(CFGRebuildState& state) {
+ state.manager->Init();
+ for (auto label : state.labels) {
+ state.manager->DeclareLabel(label);
+ }
+ for (auto& block : state.block_info) {
+ if (state.labels.count(block.start) != 0) {
+ state.manager->InsertLabel(block.start);
+ }
+ const bool ignore = BlockBranchIsIgnored(block.branch);
+ u32 end = ignore ? block.end + 1 : block.end;
+ state.manager->InsertBlock(block.start, end);
+ if (!ignore) {
+ InsertBranch(*state.manager, block.branch);
+ }
+ }
+ state.manager->Decompile();
+}
+
+std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
+ const CompilerSettings& settings,
+ ConstBufferLocker& locker) {
+ auto result_out = std::make_unique<ShaderCharacteristics>();
+ if (settings.depth == CompileDepth::BruteForce) {
+ result_out->settings.depth = CompileDepth::BruteForce;
+ return result_out;
+ }
+ CFGRebuildState state{program_code, start_address, locker};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);
state.inspect_queries.push_back(state.start);
while (!state.inspect_queries.empty()) {
if (!TryInspectAddress(state)) {
- return {};
+ result_out->settings.depth = CompileDepth::BruteForce;
+ return result_out;
}
}
- // Decompile Stacks
- state.queries.push_back(Query{state.start, {}, {}});
- bool decompiled = true;
- while (!state.queries.empty()) {
- if (!TryQuery(state)) {
- decompiled = false;
- break;
+ bool use_flow_stack = true;
+
+ bool decompiled = false;
+
+ if (settings.depth != CompileDepth::FlowStack) {
+ // Decompile Stacks
+ state.queries.push_back(Query{state.start, {}, {}});
+ decompiled = true;
+ while (!state.queries.empty()) {
+ if (!TryQuery(state)) {
+ decompiled = false;
+ break;
+ }
}
}
+ use_flow_stack = !decompiled;
+
// Sort and organize results
std::sort(state.block_info.begin(), state.block_info.end(),
- [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; });
- ShaderCharacteristics result_out{};
- result_out.decompilable = decompiled;
- result_out.start = start_address;
- result_out.end = start_address;
- for (const auto& block : state.block_info) {
+ [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
+ if (decompiled && settings.depth != CompileDepth::NoFlowStack) {
+ ASTManager manager{settings.depth != CompileDepth::DecompileBackwards,
+ settings.disable_else_derivation};
+ state.manager = &manager;
+ DecompileShader(state);
+ decompiled = state.manager->IsFullyDecompiled();
+ if (!decompiled) {
+ if (settings.depth == CompileDepth::FullDecompile) {
+ LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:");
+ } else {
+ LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:");
+ }
+ state.manager->ShowCurrentState("Of Shader");
+ state.manager->Clear();
+ } else {
+ auto characteristics = std::make_unique<ShaderCharacteristics>();
+ characteristics->start = start_address;
+ characteristics->settings.depth = settings.depth;
+ characteristics->manager = std::move(manager);
+ characteristics->end = state.block_info.back().end + 1;
+ return characteristics;
+ }
+ }
+
+ result_out->start = start_address;
+ result_out->settings.depth =
+ use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
+ result_out->blocks.clear();
+ for (auto& block : state.block_info) {
ShaderBlock new_block{};
new_block.start = block.start;
new_block.end = block.end;
- new_block.ignore_branch = block.branch.ignore;
+ new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
if (!new_block.ignore_branch) {
- new_block.branch.cond = block.branch.condition;
- new_block.branch.kills = block.branch.kill;
- new_block.branch.address = block.branch.address;
+ new_block.branch = block.branch;
}
- result_out.end = std::max(result_out.end, block.end);
- result_out.blocks.push_back(new_block);
+ result_out->end = std::max(result_out->end, block.end);
+ result_out->blocks.push_back(new_block);
}
- if (result_out.decompilable) {
- result_out.labels = std::move(state.labels);
- return {std::move(result_out)};
+ if (!use_flow_stack) {
+ result_out->labels = std::move(state.labels);
+ return result_out;
}
- // If it's not decompilable, merge the unlabelled blocks together
- auto back = result_out.blocks.begin();
+ auto back = result_out->blocks.begin();
auto next = std::next(back);
- while (next != result_out.blocks.end()) {
+ while (next != result_out->blocks.end()) {
if (state.labels.count(next->start) == 0 && next->start == back->end + 1) {
back->end = next->end;
- next = result_out.blocks.erase(next);
+ next = result_out->blocks.erase(next);
continue;
}
back = next;
++next;
}
- return {std::move(result_out)};
+
+ return result_out;
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index b0a5e4f8c..5304998b9 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -6,9 +6,12 @@
#include <list>
#include <optional>
-#include <unordered_set>
+#include <set>
+#include <variant>
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/ast.h"
+#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -35,29 +38,61 @@ struct Condition {
}
};
-struct ShaderBlock {
- struct Branch {
- Condition cond{};
- bool kills{};
- s32 address{};
+class SingleBranch {
+public:
+ SingleBranch() = default;
+ SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk,
+ bool ignore)
+ : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk},
+ ignore{ignore} {}
+
+ bool operator==(const SingleBranch& b) const {
+ return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
+ std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
+ }
+
+ bool operator!=(const SingleBranch& b) const {
+ return !operator==(b);
+ }
+
+ Condition condition{};
+ s32 address{exit_branch};
+ bool kill{};
+ bool is_sync{};
+ bool is_brk{};
+ bool ignore{};
+};
- bool operator==(const Branch& b) const {
- return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
- }
+struct CaseBranch {
+ CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {}
+ u32 cmp_value;
+ u32 address;
+};
+
+class MultiBranch {
+public:
+ MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches)
+ : gpr{gpr}, branches{std::move(branches)} {}
+
+ u32 gpr{};
+ std::vector<CaseBranch> branches{};
+};
+
+using BranchData = std::variant<SingleBranch, MultiBranch>;
+using BlockBranchInfo = std::shared_ptr<BranchData>;
- bool operator!=(const Branch& b) const {
- return !operator==(b);
- }
- };
+bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
+struct ShaderBlock {
u32 start{};
u32 end{};
bool ignore_branch{};
- Branch branch{};
+ BlockBranchInfo branch{};
bool operator==(const ShaderBlock& sb) const {
- return std::tie(start, end, ignore_branch, branch) ==
- std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
+ return std::tie(start, end, ignore_branch) ==
+ std::tie(sb.start, sb.end, sb.ignore_branch) &&
+ BlockBranchInfoAreEqual(branch, sb.branch);
}
bool operator!=(const ShaderBlock& sb) const {
@@ -67,13 +102,15 @@ struct ShaderBlock {
struct ShaderCharacteristics {
std::list<ShaderBlock> blocks{};
- bool decompilable{};
+ std::set<u32> labels{};
u32 start{};
u32 end{};
- std::unordered_set<u32> labels{};
+ ASTManager manager{true, true};
+ CompilerSettings settings{};
};
-std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
- std::size_t program_size, u32 start_address);
+std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
+ const CompilerSettings& settings,
+ ConstBufferLocker& locker);
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 47a9fd961..22c3e5120 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -33,60 +33,140 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
return (absolute_offset % SchedPeriod) == 0;
}
-} // namespace
+} // Anonymous namespace
+
+class ASTDecoder {
+public:
+ ASTDecoder(ShaderIR& ir) : ir(ir) {}
+
+ void operator()(ASTProgram& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(ASTIfThen& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(ASTIfElse& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(ASTBlockEncoded& ast) {}
+
+ void operator()(ASTBlockDecoded& ast) {}
+
+ void operator()(ASTVarSet& ast) {}
+
+ void operator()(ASTLabel& ast) {}
+
+ void operator()(ASTGoto& ast) {}
+
+ void operator()(ASTDoWhile& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(ASTReturn& ast) {}
+
+ void operator()(ASTBreak& ast) {}
+
+ void Visit(ASTNode& node) {
+ std::visit(*this, *node->GetInnerData());
+ if (node->IsBlockEncoded()) {
+ auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
+ NodeBlock bb = ir.DecodeRange(block->start, block->end);
+ node->TransformBlockEncoded(std::move(bb));
+ }
+ }
+
+private:
+ ShaderIR& ir;
+};
void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
- disable_flow_stack = false;
- const auto info = ScanFlow(program_code, program_size, main_offset);
- if (info) {
- const auto& shader_info = *info;
- coverage_begin = shader_info.start;
- coverage_end = shader_info.end;
- if (shader_info.decompilable) {
- disable_flow_stack = true;
- const auto insert_block = [this](NodeBlock& nodes, u32 label) {
- if (label == static_cast<u32>(exit_branch)) {
- return;
- }
- basic_blocks.insert({label, nodes});
- };
- const auto& blocks = shader_info.blocks;
- NodeBlock current_block;
- u32 current_label = static_cast<u32>(exit_branch);
- for (auto& block : blocks) {
- if (shader_info.labels.count(block.start) != 0) {
- insert_block(current_block, current_label);
- current_block.clear();
- current_label = block.start;
- }
- if (!block.ignore_branch) {
- DecodeRangeInner(current_block, block.start, block.end);
- InsertControlFlow(current_block, block);
- } else {
- DecodeRangeInner(current_block, block.start, block.end + 1);
- }
- }
- insert_block(current_block, current_label);
- return;
- }
- LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
- // we can't decompile it, fallback to standard method
+ decompiled = false;
+ auto info = ScanFlow(program_code, main_offset, settings, locker);
+ auto& shader_info = *info;
+ coverage_begin = shader_info.start;
+ coverage_end = shader_info.end;
+ switch (shader_info.settings.depth) {
+ case CompileDepth::FlowStack: {
for (const auto& block : shader_info.blocks) {
basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
}
- return;
+ break;
}
- LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
-
- // Now we need to deal with an undecompilable shader. We need to brute force
- // a shader that captures every position.
- coverage_begin = main_offset;
- const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
- coverage_end = shader_end;
- for (u32 label = main_offset; label < shader_end; label++) {
- basic_blocks.insert({label, DecodeRange(label, label + 1)});
+ case CompileDepth::NoFlowStack: {
+ disable_flow_stack = true;
+ const auto insert_block = [this](NodeBlock& nodes, u32 label) {
+ if (label == static_cast<u32>(exit_branch)) {
+ return;
+ }
+ basic_blocks.insert({label, nodes});
+ };
+ const auto& blocks = shader_info.blocks;
+ NodeBlock current_block;
+ u32 current_label = static_cast<u32>(exit_branch);
+ for (auto& block : blocks) {
+ if (shader_info.labels.count(block.start) != 0) {
+ insert_block(current_block, current_label);
+ current_block.clear();
+ current_label = block.start;
+ }
+ if (!block.ignore_branch) {
+ DecodeRangeInner(current_block, block.start, block.end);
+ InsertControlFlow(current_block, block);
+ } else {
+ DecodeRangeInner(current_block, block.start, block.end + 1);
+ }
+ }
+ insert_block(current_block, current_label);
+ break;
+ }
+ case CompileDepth::DecompileBackwards:
+ case CompileDepth::FullDecompile: {
+ program_manager = std::move(shader_info.manager);
+ disable_flow_stack = true;
+ decompiled = true;
+ ASTDecoder decoder{*this};
+ ASTNode program = GetASTProgram();
+ decoder.Visit(program);
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
+ [[fallthrough]];
+ case CompileDepth::BruteForce: {
+ const auto shader_end = static_cast<u32>(program_code.size());
+ coverage_begin = main_offset;
+ coverage_end = shader_end;
+ for (u32 label = main_offset; label < shader_end; ++label) {
+ basic_blocks.insert({label, DecodeRange(label, label + 1)});
+ }
+ break;
+ }
+ }
+ if (settings.depth != shader_info.settings.depth) {
+ LOG_WARNING(
+ HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
+ CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
}
}
@@ -118,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
}
return result;
};
- if (block.branch.address < 0) {
- if (block.branch.kills) {
- Node n = Operation(OperationCode::Discard);
- n = apply_conditions(block.branch.cond, n);
+ if (std::holds_alternative<SingleBranch>(*block.branch)) {
+ auto branch = std::get_if<SingleBranch>(block.branch.get());
+ if (branch->address < 0) {
+ if (branch->kill) {
+ Node n = Operation(OperationCode::Discard);
+ n = apply_conditions(branch->condition, n);
+ bb.push_back(n);
+ global_code.push_back(n);
+ return;
+ }
+ Node n = Operation(OperationCode::Exit);
+ n = apply_conditions(branch->condition, n);
bb.push_back(n);
global_code.push_back(n);
return;
}
- Node n = Operation(OperationCode::Exit);
- n = apply_conditions(block.branch.cond, n);
+ Node n = Operation(OperationCode::Branch, Immediate(branch->address));
+ n = apply_conditions(branch->condition, n);
bb.push_back(n);
global_code.push_back(n);
return;
}
- Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
- n = apply_conditions(block.branch.cond, n);
- bb.push_back(n);
- global_code.push_back(n);
+ auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+ Node op_a = GetRegister(multi_branch->gpr);
+ for (auto& branch_case : multi_branch->branches) {
+ Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
+ Node op_b = Immediate(branch_case.cmp_value);
+ Node condition =
+ GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
+ auto result = Conditional(condition, {n});
+ bb.push_back(result);
+ global_code.push_back(result);
+ }
}
u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 1473c282a..fcedd2af6 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -43,12 +43,12 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
case OpCode::Id::FMUL_IMM: {
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
if (instr.fmul.tab5cb8_2 != 0) {
- LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
- instr.fmul.tab5cb8_2.Value());
+ LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
+ instr.fmul.tab5cb8_2.Value());
}
if (instr.fmul.tab5c68_0 != 1) {
- LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
- instr.fmul.tab5c68_0.Value());
+ LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
+ instr.fmul.tab5c68_0.Value());
}
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
@@ -144,10 +144,11 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
case OpCode::Id::RRO_C:
case OpCode::Id::RRO_R:
case OpCode::Id::RRO_IMM: {
+ LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
+
// Currently RRO is only implemented as a register move.
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
SetRegister(bb, instr.gpr0, op_b);
- LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
break;
}
default:
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index b06cbe441..ee7d9a29d 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -21,8 +21,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
opcode->get().GetId() == OpCode::Id::HADD2_R) {
- if (instr.alu_half.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
}
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 6466fc011..d179b9873 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -19,12 +19,12 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
- if (instr.alu_half_imm.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half_imm.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
} else {
- if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
}
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index b73f6536e..a33d242e9 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -144,7 +144,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
case OpCode::Id::ICMP_IMM: {
const Node zero = Immediate(0);
- const auto [op_b, test] = [&]() -> std::pair<Node, Node> {
+ const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::ICMP_CR:
return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
@@ -161,10 +161,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
return {zero, zero};
}
}();
- const Node op_a = GetRegister(instr.gpr8);
+ const Node op_lhs = GetRegister(instr.gpr8);
const Node comparison =
GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
- SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b));
+ SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
break;
}
case OpCode::Id::LOP_C:
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index ca2f39e8d..5973588d6 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -19,10 +19,10 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
if (instr.ffma.tab5980_0 != 1) {
- LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
+ LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
}
if (instr.ffma.tab5980_1 != 0) {
- LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
+ LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
}
const Node op_a = GetRegister(instr.gpr8);
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 48ca7a4af..848e46874 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -20,8 +20,8 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- if (instr.hset2.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.hset2.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index 840694527..310655619 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
@@ -18,7 +19,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- DEBUG_ASSERT(instr.hsetp2.ftz == 0);
+ if (instr.hsetp2.ftz != 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ }
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
@@ -32,6 +35,8 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
h_and = instr.hsetp2.cbuf_and_imm.h_and;
op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
+ // F32 is hardcoded in hardware
+ op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
break;
case OpCode::Id::HSETP2_IMM:
cond = instr.hsetp2.cbuf_and_imm.cond;
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 95ec1cdd9..d2fe4ec5d 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -143,39 +143,37 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
}
Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
- const auto offset{static_cast<std::size_t>(image.index.Value())};
- if (const auto image = TryUseExistingImage(offset, type)) {
- return *image;
+ const auto offset = static_cast<u32>(image.index.Value());
+
+ const auto it =
+ std::find_if(std::begin(used_images), std::end(used_images),
+ [offset](const Image& entry) { return entry.GetOffset() == offset; });
+ if (it != std::end(used_images)) {
+ ASSERT(!it->IsBindless() && it->GetType() == it->GetType());
+ return *it;
}
- const std::size_t next_index{used_images.size()};
- return used_images.emplace(offset, Image{offset, next_index, type}).first->second;
+ const auto next_index = static_cast<u32>(used_images.size());
+ return used_images.emplace_back(next_index, offset, type);
}
Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
- const Node image_register{GetRegister(reg)};
- const auto [base_image, cbuf_index, cbuf_offset]{
- TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
- const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
-
- if (const auto image = TryUseExistingImage(cbuf_key, type)) {
- return *image;
- }
-
- const std::size_t next_index{used_images.size()};
- return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type})
- .first->second;
-}
-
-Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type) {
- auto it = used_images.find(offset);
- if (it == used_images.end()) {
- return nullptr;
+ const Node image_register = GetRegister(reg);
+ const auto [base_image, buffer, offset] =
+ TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
+
+ const auto it =
+ std::find_if(std::begin(used_images), std::end(used_images),
+ [buffer = buffer, offset = offset](const Image& entry) {
+ return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+ });
+ if (it != std::end(used_images)) {
+ ASSERT(it->IsBindless() && it->GetType() == it->GetType());
+ return *it;
}
- auto& image = it->second;
- ASSERT(image.GetType() == type);
- return &image;
+ const auto next_index = static_cast<u32>(used_images.size());
+ return used_images.emplace_back(next_index, offset, buffer, type);
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 7923d4d69..335d78146 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -166,9 +166,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}();
const auto [real_address_base, base_address, descriptor] =
- TrackAndGetGlobalMemory(bb, instr, false);
+ TrackGlobalMemory(bb, instr, false);
const u32 count = GetUniformTypeElementsCount(type);
+ if (!real_address_base || !base_address) {
+ // Tracking failed, load zeroes.
+ for (u32 i = 0; i < count; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
+ }
+ break;
+ }
+
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
@@ -260,22 +268,19 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}();
const auto [real_address_base, base_address, descriptor] =
- TrackAndGetGlobalMemory(bb, instr, true);
-
- // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
- SetTemporary(bb, 0, real_address_base);
+ TrackGlobalMemory(bb, instr, true);
+ if (!real_address_base || !base_address) {
+ // Tracking failed, skip the store.
+ break;
+ }
const u32 count = GetUniformTypeElementsCount(type);
for (u32 i = 0; i < count; ++i) {
- SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
- }
- for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
- const Node real_address =
- Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
+ const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-
- bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
+ const Node value = GetRegister(instr.gpr0.Value() + i);
+ bb.push_back(Operation(OperationCode::Assign, gmem, value));
}
break;
}
@@ -301,15 +306,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
return pc;
}
-std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
- Instruction instr,
- bool is_write) {
+std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
+ Instruction instr,
+ bool is_write) {
const auto addr_register{GetRegister(instr.gmem.gpr)};
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
const auto [base_address, index, offset] =
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
- ASSERT(base_address != nullptr);
+ ASSERT_OR_EXECUTE_MSG(base_address != nullptr,
+ { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
+ "Global memory tracking failed");
bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d46e0f823..17cd45d3c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -67,7 +67,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::MOV_SYS: {
- const Node value = [&]() {
+ const Node value = [this, instr] {
switch (instr.sys20) {
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
@@ -256,7 +256,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::DEPBAR: {
- LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
+ LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
default:
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index f6ee68a54..d419e9c45 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -18,7 +18,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
const auto opcode = OpCode::Decode(instr);
Node op_a = GetRegister(instr.gpr8);
- Node op_b = [&]() {
+ Node op_b = [this, instr] {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0b934a069..bb926a132 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -44,10 +44,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
bool is_bindless = false;
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
- if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
- }
-
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -62,10 +58,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
- }
-
const TextureType texture_type{instr.tex_b.texture_type};
const bool is_array = instr.tex_b.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -82,10 +74,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.texs.GetTextureProcessMode();
- if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
- }
-
const Node4 components =
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
@@ -96,6 +84,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
break;
}
+ case OpCode::Id::TLD4_B: {
+ is_bindless = true;
+ [[fallthrough]];
+ }
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.array == 0);
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
@@ -103,24 +95,20 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
"PTP is not implemented");
- if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
- }
-
const auto texture_type = instr.tld4.texture_type.Value();
- const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
+ const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
+ : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
const bool is_array = instr.tld4.array != 0;
- const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
+ const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
+ : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
WriteTexInstructionFloat(
- bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
+ bb, instr,
+ GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless));
break;
}
case OpCode::Id::TLD4S: {
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
- }
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
@@ -141,7 +129,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
const auto& sampler =
- GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
+ GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@@ -150,25 +138,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
- WriteTexsInstructionFloat(bb, instr, values);
+ WriteTexsInstructionFloat(bb, instr, values, true);
break;
}
case OpCode::Id::TXQ_B:
is_bindless = true;
[[fallthrough]];
case OpCode::Id::TXQ: {
- if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
- }
-
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const auto& sampler =
- is_bindless
- ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
- false)
- : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+ is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {});
u32 indexer = 0;
switch (instr.txq.query_type) {
@@ -201,15 +182,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
- if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
- }
-
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
- const auto& sampler = is_bindless
- ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false)
- : GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto& sampler =
+ is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}})
+ : GetSampler(instr.sampler, {{texture_type, is_array, false}});
std::vector<Node> coords;
@@ -250,25 +227,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
- if (instr.tld.nodep_flag) {
- LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
- }
-
WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
break;
}
case OpCode::Id::TLDS: {
- const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
+ const TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
- if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
- }
-
const Node4 components = GetTldsCode(instr, texture_type, is_array);
if (instr.tlds.fp32_flag) {
@@ -285,48 +254,84 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
return pc;
}
-const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
- bool is_array, bool is_shadow) {
- const auto offset = static_cast<std::size_t>(sampler.index.Value());
+const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
+ std::optional<SamplerInfo> sampler_info) {
+ const auto offset = static_cast<u32>(sampler.index.Value());
+
+ TextureType type;
+ bool is_array;
+ bool is_shadow;
+ if (sampler_info) {
+ type = sampler_info->type;
+ is_array = sampler_info->is_array;
+ is_shadow = sampler_info->is_shadow;
+ } else if (const auto sampler = locker.ObtainBoundSampler(offset)) {
+ type = sampler->texture_type.Value();
+ is_array = sampler->is_array.Value() != 0;
+ is_shadow = sampler->is_shadow.Value() != 0;
+ } else {
+ LOG_WARNING(HW_GPU, "Unknown sampler info");
+ type = TextureType::Texture2D;
+ is_array = false;
+ is_shadow = false;
+ }
// If this sampler has already been used, return the existing mapping.
- const auto itr =
+ const auto it =
std::find_if(used_samplers.begin(), used_samplers.end(),
- [&](const Sampler& entry) { return entry.GetOffset() == offset; });
- if (itr != used_samplers.end()) {
- ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
- itr->IsShadow() == is_shadow);
- return *itr;
+ [offset](const Sampler& entry) { return entry.GetOffset() == offset; });
+ if (it != used_samplers.end()) {
+ ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+ it->IsShadow() == is_shadow);
+ return *it;
}
// Otherwise create a new mapping for this sampler
- const std::size_t next_index = used_samplers.size();
- const Sampler entry{offset, next_index, type, is_array, is_shadow};
- return *used_samplers.emplace(entry).first;
+ const auto next_index = static_cast<u32>(used_samplers.size());
+ return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow));
}
-const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
- bool is_array, bool is_shadow) {
+const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
+ std::optional<SamplerInfo> sampler_info) {
const Node sampler_register = GetRegister(reg);
- const auto [base_sampler, cbuf_index, cbuf_offset] =
+ const auto [base_sampler, buffer, offset] =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
ASSERT(base_sampler != nullptr);
- const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
+
+ TextureType type;
+ bool is_array;
+ bool is_shadow;
+ if (sampler_info) {
+ type = sampler_info->type;
+ is_array = sampler_info->is_array;
+ is_shadow = sampler_info->is_shadow;
+ } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) {
+ type = sampler->texture_type.Value();
+ is_array = sampler->is_array.Value() != 0;
+ is_shadow = sampler->is_shadow.Value() != 0;
+ } else {
+ LOG_WARNING(HW_GPU, "Unknown sampler info");
+ type = TextureType::Texture2D;
+ is_array = false;
+ is_shadow = false;
+ }
// If this sampler has already been used, return the existing mapping.
- const auto itr =
+ const auto it =
std::find_if(used_samplers.begin(), used_samplers.end(),
- [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; });
- if (itr != used_samplers.end()) {
- ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
- itr->IsShadow() == is_shadow);
- return *itr;
+ [buffer = buffer, offset = offset](const Sampler& entry) {
+ return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+ });
+ if (it != used_samplers.end()) {
+ ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+ it->IsShadow() == is_shadow);
+ return *it;
}
// Otherwise create a new mapping for this sampler
- const std::size_t next_index = used_samplers.size();
- const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow};
- return *used_samplers.emplace(entry).first;
+ const auto next_index = static_cast<u32>(used_samplers.size());
+ return used_samplers.emplace_back(
+ Sampler(next_index, offset, buffer, type, is_array, is_shadow));
}
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -344,14 +349,14 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
}
}
-void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
- const Node4& components) {
+void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
+ bool ignore_mask) {
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
- if (!instr.texs.IsComponentEnabled(component))
+ if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
continue;
SetTemporary(bb, dest_elem++, components[component]);
}
@@ -411,9 +416,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
(texture_type == TextureType::TextureCube && is_array && is_shadow),
"This method is not supported.");
- const auto& sampler = is_bindless
- ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow)
- : GetSampler(instr.sampler, texture_type, is_array, is_shadow);
+ const auto& sampler =
+ is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}})
+ : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}});
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
@@ -553,7 +558,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
- bool is_array, bool is_aoffi) {
+ bool is_array, bool is_aoffi, bool is_bindless) {
const std::size_t coord_count = GetCoordCount(texture_type);
// If enabled arrays index is always stored in the gpr8 field
@@ -567,6 +572,12 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
}
u64 parameter_register = instr.gpr20.Value();
+
+ const auto& sampler =
+ is_bindless
+ ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}})
+ : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
+
std::vector<Node> aoffi;
if (is_aoffi) {
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
@@ -577,12 +588,14 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
dc = GetRegister(parameter_register++);
}
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+ const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
+ : Immediate(static_cast<u32>(instr.tld4.component));
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
+ MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component,
+ element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -610,7 +623,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@@ -646,7 +659,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 97fc6f9b1..b047cf870 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
const Node op_a =
GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
instr.video.type_a, instr.video.byte_height_a);
- const Node op_b = [&]() {
+ const Node op_b = [this, instr] {
if (instr.video.use_register_b) {
return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
instr.video.signed_b, instr.video.type_b,
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index a8e481b3c..d98d0e1dd 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation;
using Tegra::Shader::VoteOperation;
namespace {
+
OperationCode GetOperationCode(VoteOperation vote_op) {
switch (vote_op) {
case VoteOperation::All:
@@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) {
return OperationCode::VoteAll;
}
}
+
} // Anonymous namespace
u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
@@ -48,47 +50,57 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
case OpCode::Id::SHFL: {
Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
: GetRegister(instr.gpr39);
- Node width = [&] {
- // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
- // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
- // different parameters that don't properly map to GLSL's interface, but it should work
- // for cases emitted by Nvidia's compiler.
- if (instr.shfl.operation == ShuffleOperation::Up) {
- return Operation(
- OperationCode::ILogicalShiftRight,
- Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
- Immediate(8));
- } else {
- return Operation(OperationCode::ILogicalShiftRight,
- Operation(OperationCode::IAdd, Immediate(0x201F),
- Operation(OperationCode::INegate, std::move(mask))),
- Immediate(8));
- }
- }();
+ Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
+ : GetRegister(instr.gpr20);
+
+ Node thread_id = Operation(OperationCode::ThreadId);
+ Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
+ Node seg_mask = BitfieldExtract(mask, 8, 16);
- const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
+ Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
+ Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
+ Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
+ Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
+
+ Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
switch (instr.shfl.operation) {
case ShuffleOperation::Idx:
- return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
- case ShuffleOperation::Up:
- return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
+ return Operation(OperationCode::IBitwiseOr,
+ Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
+ min_thread_id);
case ShuffleOperation::Down:
- return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
+ return Operation(OperationCode::IAdd, thread_id, index);
+ case ShuffleOperation::Up:
+ return Operation(OperationCode::IAdd, thread_id,
+ Operation(OperationCode::INegate, index));
case ShuffleOperation::Bfly:
- return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
+ return Operation(OperationCode::IBitwiseXor, thread_id, index);
}
- UNREACHABLE_MSG("Invalid SHFL operation: {}",
- static_cast<u64>(instr.shfl.operation.Value()));
- return {};
+ UNREACHABLE();
+ return Immediate(0U);
}();
- // Setting the predicate before the register is intentional to avoid overwriting.
- Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
- : GetRegister(instr.gpr20);
- SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
+ Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
+ if (instr.shfl.operation == ShuffleOperation::Up) {
+ return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
+ } else {
+ return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
+ }
+ }();
+
+ SetPredicate(bb, instr.shfl.pred48, in_bounds);
SetRegister(
bb, instr.gpr0,
- Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
+ Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
+ break;
+ }
+ case OpCode::Id::FSWZADD: {
+ UNIMPLEMENTED_IF(instr.fswzadd.ndv);
+
+ Node op_a = GetRegister(instr.gpr8);
+ Node op_b = GetRegister(instr.gpr20);
+ Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
+ SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
break;
}
default:
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp
new file mode 100644
index 000000000..2647865d4
--- /dev/null
+++ b/src/video_core/shader/expr.cpp
@@ -0,0 +1,93 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <variant>
+
+#include "video_core/shader/expr.h"
+
+namespace VideoCommon::Shader {
+namespace {
+bool ExprIsBoolean(const Expr& expr) {
+ return std::holds_alternative<ExprBoolean>(*expr);
+}
+
+bool ExprBooleanGet(const Expr& expr) {
+ return std::get_if<ExprBoolean>(expr.get())->value;
+}
+} // Anonymous namespace
+
+bool ExprAnd::operator==(const ExprAnd& b) const {
+ return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
+}
+
+bool ExprAnd::operator!=(const ExprAnd& b) const {
+ return !operator==(b);
+}
+
+bool ExprOr::operator==(const ExprOr& b) const {
+ return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
+}
+
+bool ExprOr::operator!=(const ExprOr& b) const {
+ return !operator==(b);
+}
+
+bool ExprNot::operator==(const ExprNot& b) const {
+ return *operand1 == *b.operand1;
+}
+
+bool ExprNot::operator!=(const ExprNot& b) const {
+ return !operator==(b);
+}
+
+Expr MakeExprNot(Expr first) {
+ if (std::holds_alternative<ExprNot>(*first)) {
+ return std::get_if<ExprNot>(first.get())->operand1;
+ }
+ return MakeExpr<ExprNot>(std::move(first));
+}
+
+Expr MakeExprAnd(Expr first, Expr second) {
+ if (ExprIsBoolean(first)) {
+ return ExprBooleanGet(first) ? second : first;
+ }
+ if (ExprIsBoolean(second)) {
+ return ExprBooleanGet(second) ? first : second;
+ }
+ return MakeExpr<ExprAnd>(std::move(first), std::move(second));
+}
+
+Expr MakeExprOr(Expr first, Expr second) {
+ if (ExprIsBoolean(first)) {
+ return ExprBooleanGet(first) ? first : second;
+ }
+ if (ExprIsBoolean(second)) {
+ return ExprBooleanGet(second) ? second : first;
+ }
+ return MakeExpr<ExprOr>(std::move(first), std::move(second));
+}
+
+bool ExprAreEqual(const Expr& first, const Expr& second) {
+ return (*first) == (*second);
+}
+
+bool ExprAreOpposite(const Expr& first, const Expr& second) {
+ if (std::holds_alternative<ExprNot>(*first)) {
+ return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second);
+ }
+ if (std::holds_alternative<ExprNot>(*second)) {
+ return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first);
+ }
+ return false;
+}
+
+bool ExprIsTrue(const Expr& first) {
+ if (ExprIsBoolean(first)) {
+ return ExprBooleanGet(first);
+ }
+ return false;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
new file mode 100644
index 000000000..4e8264367
--- /dev/null
+++ b/src/video_core/shader/expr.h
@@ -0,0 +1,156 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <variant>
+
+#include "video_core/engines/shader_bytecode.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::ConditionCode;
+using Tegra::Shader::Pred;
+
+class ExprAnd;
+class ExprBoolean;
+class ExprCondCode;
+class ExprGprEqual;
+class ExprNot;
+class ExprOr;
+class ExprPredicate;
+class ExprVar;
+
+using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
+ ExprBoolean, ExprGprEqual>;
+using Expr = std::shared_ptr<ExprData>;
+
+class ExprAnd final {
+public:
+ explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
+
+ bool operator==(const ExprAnd& b) const;
+ bool operator!=(const ExprAnd& b) const;
+
+ Expr operand1;
+ Expr operand2;
+};
+
+class ExprOr final {
+public:
+ explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
+
+ bool operator==(const ExprOr& b) const;
+ bool operator!=(const ExprOr& b) const;
+
+ Expr operand1;
+ Expr operand2;
+};
+
+class ExprNot final {
+public:
+ explicit ExprNot(Expr a) : operand1{std::move(a)} {}
+
+ bool operator==(const ExprNot& b) const;
+ bool operator!=(const ExprNot& b) const;
+
+ Expr operand1;
+};
+
+class ExprVar final {
+public:
+ explicit ExprVar(u32 index) : var_index{index} {}
+
+ bool operator==(const ExprVar& b) const {
+ return var_index == b.var_index;
+ }
+
+ bool operator!=(const ExprVar& b) const {
+ return !operator==(b);
+ }
+
+ u32 var_index;
+};
+
+class ExprPredicate final {
+public:
+ explicit ExprPredicate(u32 predicate) : predicate{predicate} {}
+
+ bool operator==(const ExprPredicate& b) const {
+ return predicate == b.predicate;
+ }
+
+ bool operator!=(const ExprPredicate& b) const {
+ return !operator==(b);
+ }
+
+ u32 predicate;
+};
+
+class ExprCondCode final {
+public:
+ explicit ExprCondCode(ConditionCode cc) : cc{cc} {}
+
+ bool operator==(const ExprCondCode& b) const {
+ return cc == b.cc;
+ }
+
+ bool operator!=(const ExprCondCode& b) const {
+ return !operator==(b);
+ }
+
+ ConditionCode cc;
+};
+
+class ExprBoolean final {
+public:
+ explicit ExprBoolean(bool val) : value{val} {}
+
+ bool operator==(const ExprBoolean& b) const {
+ return value == b.value;
+ }
+
+ bool operator!=(const ExprBoolean& b) const {
+ return !operator==(b);
+ }
+
+ bool value;
+};
+
+class ExprGprEqual final {
+public:
+ ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {}
+
+ bool operator==(const ExprGprEqual& b) const {
+ return gpr == b.gpr && value == b.value;
+ }
+
+ bool operator!=(const ExprGprEqual& b) const {
+ return !operator==(b);
+ }
+
+ u32 gpr;
+ u32 value;
+};
+
+template <typename T, typename... Args>
+Expr MakeExpr(Args&&... args) {
+ static_assert(std::is_convertible_v<T, ExprData>);
+ return std::make_shared<ExprData>(T(std::forward<Args>(args)...));
+}
+
+bool ExprAreEqual(const Expr& first, const Expr& second);
+
+bool ExprAreOpposite(const Expr& first, const Expr& second);
+
+Expr MakeExprNot(Expr first);
+
+Expr MakeExprAnd(Expr first, Expr second);
+
+Expr MakeExprOr(Expr first, Expr second);
+
+bool ExprIsTrue(const Expr& first);
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 338bab17c..54217e6a4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -47,6 +47,7 @@ enum class OperationCode {
FTrunc, /// (MetaArithmetic, float a) -> float
FCastInteger, /// (MetaArithmetic, int a) -> float
FCastUInteger, /// (MetaArithmetic, uint a) -> float
+ FSwizzleAdd, /// (float a, float b, uint mask) -> float
IAdd, /// (MetaArithmetic, int a, int b) -> int
IMul, /// (MetaArithmetic, int a, int b) -> int
@@ -181,15 +182,8 @@ enum class OperationCode {
VoteAny, /// (bool) -> bool
VoteEqual, /// (bool) -> bool
- ShuffleIndexed, /// (uint value, uint index, uint width) -> uint
- ShuffleUp, /// (uint value, uint index, uint width) -> uint
- ShuffleDown, /// (uint value, uint index, uint width) -> uint
- ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
-
- InRangeShuffleIndexed, /// (uint index, uint width) -> bool
- InRangeShuffleUp, /// (uint index, uint width) -> bool
- InRangeShuffleDown, /// (uint index, uint width) -> bool
- InRangeShuffleButterfly, /// (uint index, uint width) -> bool
+ ThreadId, /// () -> uint
+ ShuffleIndexed, /// (uint value, uint index) -> uint
Amount,
};
@@ -230,62 +224,49 @@ using NodeBlock = std::vector<Node>;
class Sampler {
public:
/// This constructor is for bound samplers
- explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_bindless{false} {}
+ constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow)
+ : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
/// This constructor is for bindless samplers
- explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
- : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
-
- /// This constructor is for serialization/deserialization
- explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow, bool is_bindless)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_bindless{is_bindless} {}
-
- std::size_t GetOffset() const {
+ constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow)
+ : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
+ is_shadow{is_shadow}, is_bindless{true} {}
+
+ constexpr u32 GetIndex() const {
+ return index;
+ }
+
+ constexpr u32 GetOffset() const {
return offset;
}
- std::size_t GetIndex() const {
- return index;
+ constexpr u32 GetBuffer() const {
+ return buffer;
}
- Tegra::Shader::TextureType GetType() const {
+ constexpr Tegra::Shader::TextureType GetType() const {
return type;
}
- bool IsArray() const {
+ constexpr bool IsArray() const {
return is_array;
}
- bool IsShadow() const {
+ constexpr bool IsShadow() const {
return is_shadow;
}
- bool IsBindless() const {
+ constexpr bool IsBindless() const {
return is_bindless;
}
- std::pair<u32, u32> GetBindlessCBuf() const {
- return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
- }
-
- bool operator<(const Sampler& rhs) const {
- return std::tie(index, offset, type, is_array, is_shadow, is_bindless) <
- std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow,
- rhs.is_bindless);
- }
-
private:
- /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
- /// instruction.
- std::size_t offset{};
- std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
+ u32 index{}; ///< Emulated index given for the this sampler.
+ u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
+ u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
+
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
@@ -294,18 +275,13 @@ private:
class Image final {
public:
- constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
- : offset{offset}, index{index}, type{type}, is_bindless{false} {}
+ /// This constructor is for bound images
+ constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type)
+ : index{index}, offset{offset}, type{type} {}
- constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::ImageType type)
- : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_bindless{true} {}
-
- constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
- bool is_bindless, bool is_written, bool is_read, bool is_atomic)
- : offset{offset}, index{index}, type{type}, is_bindless{is_bindless},
- is_written{is_written}, is_read{is_read}, is_atomic{is_atomic} {}
+ /// This constructor is for bindless samplers
+ constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type)
+ : index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {}
void MarkWrite() {
is_written = true;
@@ -321,12 +297,16 @@ public:
is_atomic = true;
}
- constexpr std::size_t GetOffset() const {
+ constexpr u32 GetIndex() const {
+ return index;
+ }
+
+ constexpr u32 GetOffset() const {
return offset;
}
- constexpr std::size_t GetIndex() const {
- return index;
+ constexpr u32 GetBuffer() const {
+ return buffer;
}
constexpr Tegra::Shader::ImageType GetType() const {
@@ -349,18 +329,11 @@ public:
return is_atomic;
}
- constexpr std::pair<u32, u32> GetBindlessCBuf() const {
- return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
- }
-
- constexpr bool operator<(const Image& rhs) const {
- return std::tie(offset, index, type, is_bindless) <
- std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
- }
-
private:
- u64 offset{};
- std::size_t index{};
+ u32 index{};
+ u32 offset{};
+ u32 buffer{};
+
Tegra::Shader::ImageType type{};
bool is_bindless{};
bool is_written{};
@@ -410,7 +383,7 @@ public:
explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
explicit OperationNode(OperationCode code, Meta meta)
- : OperationNode(code, meta, std::vector<Node>{}) {}
+ : OperationNode(code, std::move(meta), std::vector<Node>{}) {}
explicit OperationNode(OperationCode code, std::vector<Node> operands)
: OperationNode(code, Meta{}, std::move(operands)) {}
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 2c357f310..1d9825c76 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -2,8 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
+#include <array>
#include <cmath>
-#include <unordered_map>
#include "common/assert.h"
#include "common/common_types.h"
@@ -22,8 +23,9 @@ using Tegra::Shader::PredCondition;
using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
-ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size)
- : program_code{program_code}, main_offset{main_offset}, program_size{size} {
+ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
+ ConstBufferLocker& locker)
+ : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
Decode();
}
@@ -137,7 +139,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
}
-Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
+Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
const Node node = MakeNode<InternalFlagNode>(flag);
if (negated) {
return Operation(OperationCode::LogicalNegate, node);
@@ -269,21 +271,24 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
}
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
- const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
- {PredCondition::LessThan, OperationCode::LogicalFLessThan},
- {PredCondition::Equal, OperationCode::LogicalFEqual},
- {PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
- {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
- {PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
- {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
- {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
- {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
- {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
- {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
- {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}};
-
- const auto comparison{PredicateComparisonTable.find(condition)};
- UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+ static constexpr std::array comparison_table{
+ std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan},
+ std::pair{PredCondition::Equal, OperationCode::LogicalFEqual},
+ std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
+ std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
+ std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
+ std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
+ std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
+ std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
+ std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
+ std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
+ std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual},
+ };
+
+ const auto comparison =
+ std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+ [condition](const auto entry) { return condition == entry.first; });
+ UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
"Unknown predicate comparison operation");
Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
@@ -304,21 +309,24 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
Node op_b) {
- const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
- {PredCondition::LessThan, OperationCode::LogicalILessThan},
- {PredCondition::Equal, OperationCode::LogicalIEqual},
- {PredCondition::LessEqual, OperationCode::LogicalILessEqual},
- {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
- {PredCondition::NotEqual, OperationCode::LogicalINotEqual},
- {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
- {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
- {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
- {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
- {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
- {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}};
-
- const auto comparison{PredicateComparisonTable.find(condition)};
- UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+ static constexpr std::array comparison_table{
+ std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan},
+ std::pair{PredCondition::Equal, OperationCode::LogicalIEqual},
+ std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual},
+ std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
+ std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual},
+ std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
+ std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
+ std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
+ std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
+ std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
+ std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual},
+ };
+
+ const auto comparison =
+ std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+ [condition](const auto entry) { return condition == entry.first; });
+ UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
"Unknown predicate comparison operation");
Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
@@ -335,45 +343,52 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
Node op_b) {
- const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
- {PredCondition::LessThan, OperationCode::Logical2HLessThan},
- {PredCondition::Equal, OperationCode::Logical2HEqual},
- {PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
- {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
- {PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
- {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
- {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
- {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
- {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
- {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
- {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}};
-
- const auto comparison{PredicateComparisonTable.find(condition)};
- UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+ static constexpr std::array comparison_table{
+ std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan},
+ std::pair{PredCondition::Equal, OperationCode::Logical2HEqual},
+ std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
+ std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
+ std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
+ std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
+ std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
+ std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
+ std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
+ std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
+ std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan},
+ };
+
+ const auto comparison =
+ std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+ [condition](const auto entry) { return condition == entry.first; });
+ UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
"Unknown predicate comparison operation");
return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
}
OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
- const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
- {PredOperation::And, OperationCode::LogicalAnd},
- {PredOperation::Or, OperationCode::LogicalOr},
- {PredOperation::Xor, OperationCode::LogicalXor},
+ static constexpr std::array operation_table{
+ OperationCode::LogicalAnd,
+ OperationCode::LogicalOr,
+ OperationCode::LogicalXor,
};
- const auto op = PredicateOperationTable.find(operation);
- UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation");
- return op->second;
+ const auto index = static_cast<std::size_t>(operation);
+ if (index >= operation_table.size()) {
+ UNIMPLEMENTED_MSG("Unknown predicate operation.");
+ return {};
+ }
+
+ return operation_table[index];
}
-Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
+Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
switch (cc) {
case Tegra::Shader::ConditionCode::NEU:
return GetInternalFlag(InternalFlag::Zero, true);
default:
UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
- return GetPredicate(static_cast<u64>(Pred::NeverExecute));
+ return MakeNode<PredicateNode>(Pred::NeverExecute, false);
}
}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 6f666ee30..76a849818 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <list>
#include <map>
#include <optional>
#include <set>
@@ -15,6 +16,9 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
+#include "video_core/shader/ast.h"
+#include "video_core/shader/compiler_settings.h"
+#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/node.h"
namespace VideoCommon::Shader {
@@ -45,7 +49,7 @@ public:
}
u32 GetSize() const {
- return max_offset + sizeof(float);
+ return max_offset + static_cast<u32>(sizeof(float));
}
u32 GetMaxOffset() const {
@@ -64,7 +68,8 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
- explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size);
+ explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
+ ConstBufferLocker& locker);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -91,11 +96,11 @@ public:
return used_cbufs;
}
- const std::set<Sampler>& GetSamplers() const {
+ const std::list<Sampler>& GetSamplers() const {
return used_samplers;
}
- const std::map<u64, Image>& GetImages() const {
+ const std::list<Image>& GetImages() const {
return used_images;
}
@@ -144,11 +149,38 @@ public:
return disable_flow_stack;
}
- u32 ConvertAddressToNvidiaSpace(const u32 address) const {
- return (address - main_offset) * sizeof(Tegra::Shader::Instruction);
+ bool IsDecompiled() const {
+ return decompiled;
}
+ const ASTManager& GetASTManager() const {
+ return program_manager;
+ }
+
+ ASTNode GetASTProgram() const {
+ return program_manager.GetProgram();
+ }
+
+ u32 GetASTNumVariables() const {
+ return program_manager.GetVariables();
+ }
+
+ u32 ConvertAddressToNvidiaSpace(u32 address) const {
+ return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
+ }
+
+ /// Returns a condition code evaluated from internal flags
+ Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
+
private:
+ friend class ASTDecoder;
+
+ struct SamplerInfo {
+ Tegra::Shader::TextureType type;
+ bool is_array;
+ bool is_shadow;
+ };
+
void Decode();
NodeBlock DecodeRange(u32 begin, u32 end);
@@ -213,7 +245,7 @@ private:
/// Generates a node representing an output attribute. Keeps track of used attributes.
Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
/// Generates a node representing an internal flag
- Node GetInternalFlag(InternalFlag flag, bool negated = false);
+ Node GetInternalFlag(InternalFlag flag, bool negated = false) const;
/// Generates a node representing a local memory address
Node GetLocalMemory(Node address);
/// Generates a node representing a shared memory address
@@ -271,17 +303,13 @@ private:
/// Returns a predicate combiner operation
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
- /// Returns a condition code evaluated from internal flags
- Node GetConditionCode(Tegra::Shader::ConditionCode cc);
-
/// Accesses a texture sampler
const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
- Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
+ std::optional<SamplerInfo> sampler_info);
// Accesses a texture sampler for a bindless texture.
const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
- Tegra::Shader::TextureType type, bool is_array,
- bool is_shadow);
+ std::optional<SamplerInfo> sampler_info);
/// Accesses an image.
Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
@@ -289,9 +317,6 @@ private:
/// Access a bindless image sampler.
Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
- /// Tries to access an existing image, updating it's state as needed
- Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type);
-
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -302,7 +327,7 @@ private:
const Node4& components);
void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components);
+ const Node4& components, bool ignore_mask = false);
void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components);
@@ -316,7 +341,7 @@ private:
bool is_array);
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- bool depth_compare, bool is_array, bool is_aoffi);
+ bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless);
Node4 GetTldCode(Tegra::Shader::Instruction instr);
@@ -351,12 +376,16 @@ private:
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
s64 cursor) const;
- std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(
- NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write);
+ std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
+ Tegra::Shader::Instruction instr,
+ bool is_write);
const ProgramCode& program_code;
const u32 main_offset;
- const std::size_t program_size;
+ const CompilerSettings settings;
+ ConstBufferLocker& locker;
+
+ bool decompiled{};
bool disable_flow_stack{};
u32 coverage_begin{};
@@ -364,14 +393,15 @@ private:
std::map<u32, NodeBlock> basic_blocks;
NodeBlock global_code;
+ ASTManager program_manager{true, true};
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
- std::set<Sampler> used_samplers;
- std::map<u64, Image> used_images;
+ std::list<Sampler> used_samplers;
+ std::list<Image> used_images;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
bool uses_layer{};
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 250afc6d6..1655ccf16 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -168,282 +168,6 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
}
}
-PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
- Tegra::Texture::ComponentType component_type,
- bool is_srgb) {
- // TODO(Subv): Properly implement this
- switch (format) {
- case Tegra::Texture::TextureFormat::A8R8G8B8:
- if (is_srgb) {
- return PixelFormat::RGBA8_SRGB;
- }
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::ABGR8U;
- case Tegra::Texture::ComponentType::SNORM:
- return PixelFormat::ABGR8S;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::ABGR8UI;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::B5G6R5:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::B5G6R5U;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::A2B10G10R10:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::A2B10G10R10U;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::A1B5G5R5:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::A1B5G5R5U;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::R8:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::R8U;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::R8UI;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::G8R8:
- // TextureFormat::G8R8 is actually ordered red then green, as such we can use
- // PixelFormat::RG8U and PixelFormat::RG8S. This was tested with The Legend of Zelda: Breath
- // of the Wild, which uses this format to render the hearts on the UI.
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::RG8U;
- case Tegra::Texture::ComponentType::SNORM:
- return PixelFormat::RG8S;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::RGBA16U;
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::RGBA16F;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::BF10GF11RF11:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::R11FG11FB10F;
- default:
- break;
- }
- case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::RGBA32F;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::RGBA32UI;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::R32_G32:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::RG32F;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::RG32UI;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::R32_G32_B32:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::RGB32F;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::R16:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::R16F;
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::R16U;
- case Tegra::Texture::ComponentType::SNORM:
- return PixelFormat::R16S;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::R16UI;
- case Tegra::Texture::ComponentType::SINT:
- return PixelFormat::R16I;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::R32:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::R32F;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::R32UI;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::ZF32:
- return PixelFormat::Z32F;
- case Tegra::Texture::TextureFormat::Z16:
- return PixelFormat::Z16;
- case Tegra::Texture::TextureFormat::S8Z24:
- return PixelFormat::S8Z24;
- case Tegra::Texture::TextureFormat::ZF32_X24S8:
- return PixelFormat::Z32FS8;
- case Tegra::Texture::TextureFormat::DXT1:
- return is_srgb ? PixelFormat::DXT1_SRGB : PixelFormat::DXT1;
- case Tegra::Texture::TextureFormat::DXT23:
- return is_srgb ? PixelFormat::DXT23_SRGB : PixelFormat::DXT23;
- case Tegra::Texture::TextureFormat::DXT45:
- return is_srgb ? PixelFormat::DXT45_SRGB : PixelFormat::DXT45;
- case Tegra::Texture::TextureFormat::DXN1:
- return PixelFormat::DXN1;
- case Tegra::Texture::TextureFormat::DXN2:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::DXN2UNORM;
- case Tegra::Texture::ComponentType::SNORM:
- return PixelFormat::DXN2SNORM;
- default:
- break;
- }
- break;
- case Tegra::Texture::TextureFormat::BC7U:
- return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U;
- case Tegra::Texture::TextureFormat::BC6H_UF16:
- return PixelFormat::BC6H_UF16;
- case Tegra::Texture::TextureFormat::BC6H_SF16:
- return PixelFormat::BC6H_SF16;
- case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
- return is_srgb ? PixelFormat::ASTC_2D_4X4_SRGB : PixelFormat::ASTC_2D_4X4;
- case Tegra::Texture::TextureFormat::ASTC_2D_5X4:
- return is_srgb ? PixelFormat::ASTC_2D_5X4_SRGB : PixelFormat::ASTC_2D_5X4;
- case Tegra::Texture::TextureFormat::ASTC_2D_5X5:
- return is_srgb ? PixelFormat::ASTC_2D_5X5_SRGB : PixelFormat::ASTC_2D_5X5;
- case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
- return is_srgb ? PixelFormat::ASTC_2D_8X8_SRGB : PixelFormat::ASTC_2D_8X8;
- case Tegra::Texture::TextureFormat::ASTC_2D_8X5:
- return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5;
- case Tegra::Texture::TextureFormat::ASTC_2D_10X8:
- return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8;
- case Tegra::Texture::TextureFormat::R16_G16:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::RG16F;
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::RG16;
- case Tegra::Texture::ComponentType::SNORM:
- return PixelFormat::RG16S;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::RG16UI;
- case Tegra::Texture::ComponentType::SINT:
- return PixelFormat::RG16I;
- default:
- break;
- }
- break;
- default:
- break;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
- static_cast<u32>(component_type));
- UNREACHABLE();
- return PixelFormat::ABGR8U;
-}
-
-ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
- // TODO(Subv): Implement more component types
- switch (type) {
- case Tegra::Texture::ComponentType::UNORM:
- return ComponentType::UNorm;
- case Tegra::Texture::ComponentType::FLOAT:
- return ComponentType::Float;
- case Tegra::Texture::ComponentType::SNORM:
- return ComponentType::SNorm;
- case Tegra::Texture::ComponentType::UINT:
- return ComponentType::UInt;
- case Tegra::Texture::ComponentType::SINT:
- return ComponentType::SInt;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
- UNREACHABLE();
- return ComponentType::UNorm;
- }
-}
-
-ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
- // TODO(Subv): Implement more render targets
- switch (format) {
- case Tegra::RenderTargetFormat::RGBA8_UNORM:
- case Tegra::RenderTargetFormat::RGBA8_SRGB:
- case Tegra::RenderTargetFormat::BGRA8_UNORM:
- case Tegra::RenderTargetFormat::BGRA8_SRGB:
- case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
- case Tegra::RenderTargetFormat::R8_UNORM:
- case Tegra::RenderTargetFormat::RG16_UNORM:
- case Tegra::RenderTargetFormat::R16_UNORM:
- case Tegra::RenderTargetFormat::B5G6R5_UNORM:
- case Tegra::RenderTargetFormat::BGR5A1_UNORM:
- case Tegra::RenderTargetFormat::RG8_UNORM:
- case Tegra::RenderTargetFormat::RGBA16_UNORM:
- return ComponentType::UNorm;
- case Tegra::RenderTargetFormat::RGBA8_SNORM:
- case Tegra::RenderTargetFormat::RG16_SNORM:
- case Tegra::RenderTargetFormat::R16_SNORM:
- case Tegra::RenderTargetFormat::RG8_SNORM:
- return ComponentType::SNorm;
- case Tegra::RenderTargetFormat::RGBA16_FLOAT:
- case Tegra::RenderTargetFormat::RGBX16_FLOAT:
- case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
- case Tegra::RenderTargetFormat::RGBA32_FLOAT:
- case Tegra::RenderTargetFormat::RG32_FLOAT:
- case Tegra::RenderTargetFormat::RG16_FLOAT:
- case Tegra::RenderTargetFormat::R16_FLOAT:
- case Tegra::RenderTargetFormat::R32_FLOAT:
- return ComponentType::Float;
- case Tegra::RenderTargetFormat::RGBA32_UINT:
- case Tegra::RenderTargetFormat::RGBA16_UINT:
- case Tegra::RenderTargetFormat::RG16_UINT:
- case Tegra::RenderTargetFormat::R8_UINT:
- case Tegra::RenderTargetFormat::R16_UINT:
- case Tegra::RenderTargetFormat::RG32_UINT:
- case Tegra::RenderTargetFormat::R32_UINT:
- case Tegra::RenderTargetFormat::RGBA8_UINT:
- return ComponentType::UInt;
- case Tegra::RenderTargetFormat::RG16_SINT:
- case Tegra::RenderTargetFormat::R16_SINT:
- return ComponentType::SInt;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- return ComponentType::UNorm;
- }
-}
-
PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
@@ -458,22 +182,6 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
}
}
-ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
- switch (format) {
- case Tegra::DepthFormat::Z16_UNORM:
- case Tegra::DepthFormat::S8_Z24_UNORM:
- case Tegra::DepthFormat::Z24_S8_UNORM:
- return ComponentType::UNorm;
- case Tegra::DepthFormat::Z32_FLOAT:
- case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
- return ComponentType::Float;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- return ComponentType::UNorm;
- }
-}
-
SurfaceType GetFormatType(PixelFormat pixel_format) {
if (static_cast<std::size_t>(pixel_format) <
static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
@@ -510,6 +218,16 @@ bool IsPixelFormatASTC(PixelFormat format) {
case PixelFormat::ASTC_2D_8X5_SRGB:
case PixelFormat::ASTC_2D_10X8:
case PixelFormat::ASTC_2D_10X8_SRGB:
+ case PixelFormat::ASTC_2D_6X6:
+ case PixelFormat::ASTC_2D_6X6_SRGB:
+ case PixelFormat::ASTC_2D_10X10:
+ case PixelFormat::ASTC_2D_10X10_SRGB:
+ case PixelFormat::ASTC_2D_12X12:
+ case PixelFormat::ASTC_2D_12X12_SRGB:
+ case PixelFormat::ASTC_2D_8X6:
+ case PixelFormat::ASTC_2D_8X6_SRGB:
+ case PixelFormat::ASTC_2D_6X5:
+ case PixelFormat::ASTC_2D_6X5_SRGB:
return true;
default:
return false;
@@ -530,6 +248,11 @@ bool IsPixelFormatSRGB(PixelFormat format) {
case PixelFormat::ASTC_2D_5X4_SRGB:
case PixelFormat::ASTC_2D_5X5_SRGB:
case PixelFormat::ASTC_2D_10X8_SRGB:
+ case PixelFormat::ASTC_2D_6X6_SRGB:
+ case PixelFormat::ASTC_2D_10X10_SRGB:
+ case PixelFormat::ASTC_2D_12X12_SRGB:
+ case PixelFormat::ASTC_2D_8X6_SRGB:
+ case PixelFormat::ASTC_2D_6X5_SRGB:
return true;
default:
return false;
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 1e1c432a5..0d17a93ed 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -67,45 +67,47 @@ enum class PixelFormat {
DXT23_SRGB = 49,
DXT45_SRGB = 50,
BC7U_SRGB = 51,
- ASTC_2D_4X4_SRGB = 52,
- ASTC_2D_8X8_SRGB = 53,
- ASTC_2D_8X5_SRGB = 54,
- ASTC_2D_5X4_SRGB = 55,
- ASTC_2D_5X5 = 56,
- ASTC_2D_5X5_SRGB = 57,
- ASTC_2D_10X8 = 58,
- ASTC_2D_10X8_SRGB = 59,
+ R4G4B4A4U = 52,
+ ASTC_2D_4X4_SRGB = 53,
+ ASTC_2D_8X8_SRGB = 54,
+ ASTC_2D_8X5_SRGB = 55,
+ ASTC_2D_5X4_SRGB = 56,
+ ASTC_2D_5X5 = 57,
+ ASTC_2D_5X5_SRGB = 58,
+ ASTC_2D_10X8 = 59,
+ ASTC_2D_10X8_SRGB = 60,
+ ASTC_2D_6X6 = 61,
+ ASTC_2D_6X6_SRGB = 62,
+ ASTC_2D_10X10 = 63,
+ ASTC_2D_10X10_SRGB = 64,
+ ASTC_2D_12X12 = 65,
+ ASTC_2D_12X12_SRGB = 66,
+ ASTC_2D_8X6 = 67,
+ ASTC_2D_8X6_SRGB = 68,
+ ASTC_2D_6X5 = 69,
+ ASTC_2D_6X5_SRGB = 70,
+ E5B9G9R9F = 71,
MaxColorFormat,
// Depth formats
- Z32F = 60,
- Z16 = 61,
+ Z32F = 72,
+ Z16 = 73,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 62,
- S8Z24 = 63,
- Z32FS8 = 64,
+ Z24S8 = 74,
+ S8Z24 = 75,
+ Z32FS8 = 76,
MaxDepthStencilFormat,
Max = MaxDepthStencilFormat,
Invalid = 255,
};
-
static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
-enum class ComponentType {
- Invalid = 0,
- SNorm = 1,
- UNorm = 2,
- SInt = 3,
- UInt = 4,
- Float = 5,
-};
-
enum class SurfaceType {
ColorTexture = 0,
Depth = 1,
@@ -177,6 +179,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
2, // DXT23_SRGB
2, // DXT45_SRGB
2, // BC7U_SRGB
+ 0, // R4G4B4A4U
2, // ASTC_2D_4X4_SRGB
2, // ASTC_2D_8X8_SRGB
2, // ASTC_2D_8X5_SRGB
@@ -185,6 +188,17 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
2, // ASTC_2D_5X5_SRGB
2, // ASTC_2D_10X8
2, // ASTC_2D_10X8_SRGB
+ 2, // ASTC_2D_6X6
+ 2, // ASTC_2D_6X6_SRGB
+ 2, // ASTC_2D_10X10
+ 2, // ASTC_2D_10X10_SRGB
+ 2, // ASTC_2D_12X12
+ 2, // ASTC_2D_12X12_SRGB
+ 2, // ASTC_2D_8X6
+ 2, // ASTC_2D_8X6_SRGB
+ 2, // ASTC_2D_6X5
+ 2, // ASTC_2D_6X5_SRGB
+ 0, // E5B9G9R9F
0, // Z32F
0, // Z16
0, // Z24S8
@@ -261,6 +275,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
4, // DXT23_SRGB
4, // DXT45_SRGB
4, // BC7U_SRGB
+ 1, // R4G4B4A4U
4, // ASTC_2D_4X4_SRGB
8, // ASTC_2D_8X8_SRGB
8, // ASTC_2D_8X5_SRGB
@@ -269,6 +284,17 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
5, // ASTC_2D_5X5_SRGB
10, // ASTC_2D_10X8
10, // ASTC_2D_10X8_SRGB
+ 6, // ASTC_2D_6X6
+ 6, // ASTC_2D_6X6_SRGB
+ 10, // ASTC_2D_10X10
+ 10, // ASTC_2D_10X10_SRGB
+ 12, // ASTC_2D_12X12
+ 12, // ASTC_2D_12X12_SRGB
+ 8, // ASTC_2D_8X6
+ 8, // ASTC_2D_8X6_SRGB
+ 6, // ASTC_2D_6X5
+ 6, // ASTC_2D_6X5_SRGB
+ 1, // E5B9G9R9F
1, // Z32F
1, // Z16
1, // Z24S8
@@ -285,71 +311,83 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
}
constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
- 1, // ABGR8U
- 1, // ABGR8S
- 1, // ABGR8UI
- 1, // B5G6R5U
- 1, // A2B10G10R10U
- 1, // A1B5G5R5U
- 1, // R8U
- 1, // R8UI
- 1, // RGBA16F
- 1, // RGBA16U
- 1, // RGBA16UI
- 1, // R11FG11FB10F
- 1, // RGBA32UI
- 4, // DXT1
- 4, // DXT23
- 4, // DXT45
- 4, // DXN1
- 4, // DXN2UNORM
- 4, // DXN2SNORM
- 4, // BC7U
- 4, // BC6H_UF16
- 4, // BC6H_SF16
- 4, // ASTC_2D_4X4
- 1, // BGRA8
- 1, // RGBA32F
- 1, // RG32F
- 1, // R32F
- 1, // R16F
- 1, // R16U
- 1, // R16S
- 1, // R16UI
- 1, // R16I
- 1, // RG16
- 1, // RG16F
- 1, // RG16UI
- 1, // RG16I
- 1, // RG16S
- 1, // RGB32F
- 1, // RGBA8_SRGB
- 1, // RG8U
- 1, // RG8S
- 1, // RG32UI
- 1, // RGBX16F
- 1, // R32UI
- 8, // ASTC_2D_8X8
- 5, // ASTC_2D_8X5
- 4, // ASTC_2D_5X4
- 1, // BGRA8_SRGB
- 4, // DXT1_SRGB
- 4, // DXT23_SRGB
- 4, // DXT45_SRGB
- 4, // BC7U_SRGB
- 4, // ASTC_2D_4X4_SRGB
- 8, // ASTC_2D_8X8_SRGB
- 5, // ASTC_2D_8X5_SRGB
- 4, // ASTC_2D_5X4_SRGB
- 5, // ASTC_2D_5X5
- 5, // ASTC_2D_5X5_SRGB
- 8, // ASTC_2D_10X8
- 8, // ASTC_2D_10X8_SRGB
- 1, // Z32F
- 1, // Z16
- 1, // Z24S8
- 1, // S8Z24
- 1, // Z32FS8
+ 1, // ABGR8U
+ 1, // ABGR8S
+ 1, // ABGR8UI
+ 1, // B5G6R5U
+ 1, // A2B10G10R10U
+ 1, // A1B5G5R5U
+ 1, // R8U
+ 1, // R8UI
+ 1, // RGBA16F
+ 1, // RGBA16U
+ 1, // RGBA16UI
+ 1, // R11FG11FB10F
+ 1, // RGBA32UI
+ 4, // DXT1
+ 4, // DXT23
+ 4, // DXT45
+ 4, // DXN1
+ 4, // DXN2UNORM
+ 4, // DXN2SNORM
+ 4, // BC7U
+ 4, // BC6H_UF16
+ 4, // BC6H_SF16
+ 4, // ASTC_2D_4X4
+ 1, // BGRA8
+ 1, // RGBA32F
+ 1, // RG32F
+ 1, // R32F
+ 1, // R16F
+ 1, // R16U
+ 1, // R16S
+ 1, // R16UI
+ 1, // R16I
+ 1, // RG16
+ 1, // RG16F
+ 1, // RG16UI
+ 1, // RG16I
+ 1, // RG16S
+ 1, // RGB32F
+ 1, // RGBA8_SRGB
+ 1, // RG8U
+ 1, // RG8S
+ 1, // RG32UI
+ 1, // RGBX16F
+ 1, // R32UI
+ 8, // ASTC_2D_8X8
+ 5, // ASTC_2D_8X5
+ 4, // ASTC_2D_5X4
+ 1, // BGRA8_SRGB
+ 4, // DXT1_SRGB
+ 4, // DXT23_SRGB
+ 4, // DXT45_SRGB
+ 4, // BC7U_SRGB
+ 1, // R4G4B4A4U
+ 4, // ASTC_2D_4X4_SRGB
+ 8, // ASTC_2D_8X8_SRGB
+ 5, // ASTC_2D_8X5_SRGB
+ 4, // ASTC_2D_5X4_SRGB
+ 5, // ASTC_2D_5X5
+ 5, // ASTC_2D_5X5_SRGB
+ 8, // ASTC_2D_10X8
+ 8, // ASTC_2D_10X8_SRGB
+ 6, // ASTC_2D_6X6
+ 6, // ASTC_2D_6X6_SRGB
+ 10, // ASTC_2D_10X10
+ 10, // ASTC_2D_10X10_SRGB
+ 12, // ASTC_2D_12X12
+ 12, // ASTC_2D_12X12_SRGB
+ 6, // ASTC_2D_8X6
+ 6, // ASTC_2D_8X6_SRGB
+ 5, // ASTC_2D_6X5
+ 5, // ASTC_2D_6X5_SRGB
+ 1, // E5B9G9R9F
+ 1, // Z32F
+ 1, // Z16
+ 1, // Z24S8
+ 1, // S8Z24
+ 1, // Z32FS8
}};
static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
@@ -413,6 +451,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
128, // DXT23_SRGB
128, // DXT45_SRGB
128, // BC7U
+ 16, // R4G4B4A4U
128, // ASTC_2D_4X4_SRGB
128, // ASTC_2D_8X8_SRGB
128, // ASTC_2D_8X5_SRGB
@@ -421,6 +460,17 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
128, // ASTC_2D_5X5_SRGB
128, // ASTC_2D_10X8
128, // ASTC_2D_10X8_SRGB
+ 128, // ASTC_2D_6X6
+ 128, // ASTC_2D_6X6_SRGB
+ 128, // ASTC_2D_10X10
+ 128, // ASTC_2D_10X10_SRGB
+ 128, // ASTC_2D_12X12
+ 128, // ASTC_2D_12X12_SRGB
+ 128, // ASTC_2D_8X6
+ 128, // ASTC_2D_8X6_SRGB
+ 128, // ASTC_2D_6X5
+ 128, // ASTC_2D_6X5_SRGB
+ 32, // E5B9G9R9F
32, // Z32F
16, // Z16
32, // Z24S8
@@ -504,6 +554,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::Compressed, // DXT23_SRGB
SurfaceCompression::Compressed, // DXT45_SRGB
SurfaceCompression::Compressed, // BC7U_SRGB
+ SurfaceCompression::None, // R4G4B4A4U
SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB
SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB
SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB
@@ -512,6 +563,17 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB
SurfaceCompression::Converted, // ASTC_2D_10X8
SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB
+ SurfaceCompression::Converted, // ASTC_2D_6X6
+ SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB
+ SurfaceCompression::Converted, // ASTC_2D_10X10
+ SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB
+ SurfaceCompression::Converted, // ASTC_2D_12X12
+ SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB
+ SurfaceCompression::Converted, // ASTC_2D_8X6
+ SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB
+ SurfaceCompression::Converted, // ASTC_2D_6X5
+ SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB
+ SurfaceCompression::None, // E5B9G9R9F
SurfaceCompression::None, // Z32F
SurfaceCompression::None, // Z16
SurfaceCompression::None, // Z24S8
@@ -537,18 +599,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format);
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format);
-PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
- Tegra::Texture::ComponentType component_type,
- bool is_srgb);
-
-ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type);
-
-ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format);
-
PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format);
-ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format);
-
SurfaceType GetFormatType(PixelFormat pixel_format);
bool IsPixelFormatASTC(PixelFormat format);
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
new file mode 100644
index 000000000..271e67533
--- /dev/null
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -0,0 +1,208 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/texture_cache/format_lookup_table.h"
+
+namespace VideoCommon {
+
+using Tegra::Texture::ComponentType;
+using Tegra::Texture::TextureFormat;
+using VideoCore::Surface::PixelFormat;
+
+namespace {
+
+constexpr auto SNORM = ComponentType::SNORM;
+constexpr auto UNORM = ComponentType::UNORM;
+constexpr auto SINT = ComponentType::SINT;
+constexpr auto UINT = ComponentType::UINT;
+constexpr auto SNORM_FORCE_FP16 = ComponentType::SNORM_FORCE_FP16;
+constexpr auto UNORM_FORCE_FP16 = ComponentType::UNORM_FORCE_FP16;
+constexpr auto FLOAT = ComponentType::FLOAT;
+constexpr bool C = false; // Normal color
+constexpr bool S = true; // Srgb
+
+struct Table {
+ constexpr Table(TextureFormat texture_format, bool is_srgb, ComponentType red_component,
+ ComponentType green_component, ComponentType blue_component,
+ ComponentType alpha_component, PixelFormat pixel_format)
+ : texture_format{texture_format}, pixel_format{pixel_format}, red_component{red_component},
+ green_component{green_component}, blue_component{blue_component},
+ alpha_component{alpha_component}, is_srgb{is_srgb} {}
+
+ TextureFormat texture_format;
+ PixelFormat pixel_format;
+ ComponentType red_component;
+ ComponentType green_component;
+ ComponentType blue_component;
+ ComponentType alpha_component;
+ bool is_srgb;
+};
+constexpr std::array<Table, 74> DefinitionTable = {{
+ {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
+ {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
+ {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
+ {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA8_SRGB},
+
+ {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5U},
+
+ {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10U},
+
+ {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5U},
+
+ {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R4G4B4A4U},
+
+ {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8U},
+ {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8UI},
+
+ {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
+ {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
+
+ {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
+ {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
+ {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},
+
+ {TextureFormat::R16_G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG16F},
+ {TextureFormat::R16_G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG16},
+ {TextureFormat::R16_G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG16S},
+ {TextureFormat::R16_G16, C, UINT, UINT, UINT, UINT, PixelFormat::RG16UI},
+ {TextureFormat::R16_G16, C, SINT, SINT, SINT, SINT, PixelFormat::RG16I},
+
+ {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16F},
+ {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16U},
+ {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16S},
+ {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16UI},
+ {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16I},
+
+ {TextureFormat::BF10GF11RF11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R11FG11FB10F},
+
+ {TextureFormat::R32_G32_B32_A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA32F},
+ {TextureFormat::R32_G32_B32_A32, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA32UI},
+
+ {TextureFormat::R32_G32_B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGB32F},
+
+ {TextureFormat::R32_G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG32F},
+ {TextureFormat::R32_G32, C, UINT, UINT, UINT, UINT, PixelFormat::RG32UI},
+
+ {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F},
+ {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI},
+
+ {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F},
+
+ {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
+ {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
+ {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
+ {TextureFormat::ZF32_X24S8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z32FS8},
+
+ {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
+ {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB},
+
+ {TextureFormat::DXT23, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23},
+ {TextureFormat::DXT23, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23_SRGB},
+
+ {TextureFormat::DXT45, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45},
+ {TextureFormat::DXT45, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45_SRGB},
+
+ // TODO: Use a different pixel format for SNORM
+ {TextureFormat::DXN1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN1},
+ {TextureFormat::DXN1, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN1},
+
+ {TextureFormat::DXN2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN2UNORM},
+ {TextureFormat::DXN2, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN2SNORM},
+
+ {TextureFormat::BC7U, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U},
+ {TextureFormat::BC7U, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U_SRGB},
+
+ {TextureFormat::BC6H_SF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SF16},
+ {TextureFormat::BC6H_UF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UF16},
+
+ {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4},
+ {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
+
+ {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4},
+ {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
+
+ {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5},
+ {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
+
+ {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8},
+ {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
+
+ {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5},
+ {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
+
+ {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8},
+ {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
+
+ {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6},
+ {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
+
+ {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10},
+ {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
+
+ {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12},
+ {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
+
+ {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6},
+ {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
+
+ {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5},
+ {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB},
+}};
+
+} // Anonymous namespace
+
+FormatLookupTable::FormatLookupTable() {
+ table.fill(static_cast<u8>(PixelFormat::Invalid));
+
+ for (const auto& entry : DefinitionTable) {
+ table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component,
+ entry.green_component, entry.blue_component, entry.alpha_component)] =
+ static_cast<u8>(entry.pixel_format);
+ }
+}
+
+PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb,
+ ComponentType red_component,
+ ComponentType green_component,
+ ComponentType blue_component,
+ ComponentType alpha_component) const noexcept {
+ const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex(
+ format, is_srgb, red_component, green_component, blue_component, alpha_component)]);
+ // [[likely]]
+ if (pixel_format != PixelFormat::Invalid) {
+ return pixel_format;
+ }
+ UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}",
+ static_cast<int>(format), is_srgb, static_cast<int>(red_component),
+ static_cast<int>(green_component), static_cast<int>(blue_component),
+ static_cast<int>(alpha_component));
+ return PixelFormat::ABGR8U;
+}
+
+void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
+ ComponentType green_component, ComponentType blue_component,
+ ComponentType alpha_component, PixelFormat pixel_format) {}
+
+std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb,
+ ComponentType red_component,
+ ComponentType green_component,
+ ComponentType blue_component,
+ ComponentType alpha_component) noexcept {
+ const auto format_index = static_cast<std::size_t>(format);
+ const auto red_index = static_cast<std::size_t>(red_component);
+ const auto green_index = static_cast<std::size_t>(red_component);
+ const auto blue_index = static_cast<std::size_t>(red_component);
+ const auto alpha_index = static_cast<std::size_t>(red_component);
+ const std::size_t srgb_index = is_srgb ? 1 : 0;
+
+ return format_index * PerFormat +
+ srgb_index * PerComponent * PerComponent * PerComponent * PerComponent +
+ alpha_index * PerComponent * PerComponent * PerComponent +
+ blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h
new file mode 100644
index 000000000..aa77e0a5a
--- /dev/null
+++ b/src/video_core/texture_cache/format_lookup_table.h
@@ -0,0 +1,51 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <limits>
+#include "video_core/surface.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+class FormatLookupTable {
+public:
+ explicit FormatLookupTable();
+
+ VideoCore::Surface::PixelFormat GetPixelFormat(
+ Tegra::Texture::TextureFormat format, bool is_srgb,
+ Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component,
+ Tegra::Texture::ComponentType blue_component,
+ Tegra::Texture::ComponentType alpha_component) const noexcept;
+
+private:
+ static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max());
+
+ static constexpr std::size_t NumTextureFormats = 128;
+
+ static constexpr std::size_t PerComponent = 8;
+ static constexpr std::size_t PerComponents2 = PerComponent * PerComponent;
+ static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent;
+ static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent;
+ static constexpr std::size_t PerFormat = PerComponents4 * 2;
+
+ static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb,
+ Tegra::Texture::ComponentType red_component,
+ Tegra::Texture::ComponentType green_component,
+ Tegra::Texture::ComponentType blue_component,
+ Tegra::Texture::ComponentType alpha_component) noexcept;
+
+ void Set(Tegra::Texture::TextureFormat format, bool is_srgb,
+ Tegra::Texture::ComponentType red_component,
+ Tegra::Texture::ComponentType green_component,
+ Tegra::Texture::ComponentType blue_component,
+ Tegra::Texture::ComponentType alpha_component,
+ VideoCore::Surface::PixelFormat pixel_format);
+
+ std::array<u8, NumTextureFormats * PerFormat> table;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 683c49207..829268b4c 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/algorithm.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/microprofile.h"
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 5e497e49f..1bed82898 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -4,12 +4,11 @@
#pragma once
-#include <algorithm>
+#include <optional>
+#include <tuple>
#include <unordered_map>
#include <vector>
-#include "common/assert.h"
-#include "common/binary_find.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/morton.h"
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 1e4d3fb79..858e17e08 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -2,24 +2,23 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <map>
+#include <algorithm>
+#include <string>
+#include <tuple>
#include "common/alignment.h"
#include "common/bit_util.h"
#include "core/core.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/surface.h"
+#include "video_core/texture_cache/format_lookup_table.h"
#include "video_core/texture_cache/surface_params.h"
namespace VideoCommon {
-using VideoCore::Surface::ComponentTypeFromDepthFormat;
-using VideoCore::Surface::ComponentTypeFromRenderTarget;
-using VideoCore::Surface::ComponentTypeFromTexture;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
-using VideoCore::Surface::PixelFormatFromTextureFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceTargetFromTextureType;
using VideoCore::Surface::SurfaceType;
@@ -69,7 +68,8 @@ constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
} // Anonymous namespace
-SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& tic,
+SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table,
+ const Tegra::Texture::TICEntry& tic,
const VideoCommon::Shader::Sampler& entry) {
SurfaceParams params;
params.is_tiled = tic.IsTiled();
@@ -78,8 +78,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& ti
params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
- params.pixel_format =
- PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion);
+ params.pixel_format = lookup_table.GetPixelFormat(
+ tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
params.type = GetFormatType(params.pixel_format);
if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) {
switch (params.pixel_format) {
@@ -99,7 +99,6 @@ SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& ti
}
params.type = GetFormatType(params.pixel_format);
}
- params.component_type = ComponentTypeFromTexture(tic.r_type.Value());
params.type = GetFormatType(params.pixel_format);
// TODO: on 1DBuffer we should use the tic info.
if (tic.IsBuffer()) {
@@ -128,7 +127,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& ti
return params;
}
-SurfaceParams SurfaceParams::CreateForImage(const Tegra::Texture::TICEntry& tic,
+SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table,
+ const Tegra::Texture::TICEntry& tic,
const VideoCommon::Shader::Image& entry) {
SurfaceParams params;
params.is_tiled = tic.IsTiled();
@@ -137,10 +137,9 @@ SurfaceParams SurfaceParams::CreateForImage(const Tegra::Texture::TICEntry& tic,
params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
- params.pixel_format =
- PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion);
+ params.pixel_format = lookup_table.GetPixelFormat(
+ tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
params.type = GetFormatType(params.pixel_format);
- params.component_type = ComponentTypeFromTexture(tic.r_type.Value());
params.type = GetFormatType(params.pixel_format);
params.target = ImageTypeToSurfaceTarget(entry.GetType());
// TODO: on 1DBuffer we should use the tic info.
@@ -181,7 +180,6 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer(
params.block_depth = std::min(block_depth, 5U);
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromDepthFormat(format);
- params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
params.width = zeta_width;
params.height = zeta_height;
@@ -206,7 +204,6 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
params.block_depth = config.memory_layout.block_depth;
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
- params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
if (params.is_tiled) {
params.pitch = 0;
@@ -236,7 +233,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0,
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
- params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
params.width = config.width;
params.height = config.height;
@@ -355,10 +351,10 @@ std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size
bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
- height, depth, pitch, num_levels, pixel_format, component_type, type, target) ==
+ height, depth, pitch, num_levels, pixel_format, type, target) ==
std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
- rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target);
+ rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target);
}
std::string SurfaceParams::TargetName() const {
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index c58e7f8a4..709aa0dc2 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -16,16 +16,20 @@
namespace VideoCommon {
+class FormatLookupTable;
+
using VideoCore::Surface::SurfaceCompression;
class SurfaceParams {
public:
/// Creates SurfaceCachedParams from a texture configuration.
- static SurfaceParams CreateForTexture(const Tegra::Texture::TICEntry& tic,
+ static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table,
+ const Tegra::Texture::TICEntry& tic,
const VideoCommon::Shader::Sampler& entry);
/// Creates SurfaceCachedParams from an image configuration.
- static SurfaceParams CreateForImage(const Tegra::Texture::TICEntry& tic,
+ static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table,
+ const Tegra::Texture::TICEntry& tic,
const VideoCommon::Shader::Image& entry);
/// Creates SurfaceCachedParams for a depth buffer configuration.
@@ -248,7 +252,6 @@ public:
u32 num_levels;
u32 emulated_levels;
VideoCore::Surface::PixelFormat pixel_format;
- VideoCore::Surface::ComponentType component_type;
VideoCore::Surface::SurfaceType type;
VideoCore::Surface::SurfaceTarget target;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 877c6635d..41309ebea 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -29,6 +29,7 @@
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/copy_params.h"
+#include "video_core/texture_cache/format_lookup_table.h"
#include "video_core/texture_cache/surface_base.h"
#include "video_core/texture_cache/surface_params.h"
#include "video_core/texture_cache/surface_view.h"
@@ -62,10 +63,10 @@ public:
}
}
- /***
- * `Guard` guarantees that rendertargets don't unregister themselves if the
+ /**
+ * Guarantees that rendertargets don't unregister themselves if the
* collide. Protection is currently only done on 3D slices.
- ***/
+ */
void GuardRenderTargets(bool new_guard) {
guard_render_targets = new_guard;
}
@@ -96,7 +97,7 @@ public:
if (!gpu_addr) {
return {};
}
- const auto params{SurfaceParams::CreateForTexture(tic, entry)};
+ const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
if (guard_samplers) {
sampled_textures.push_back(surface);
@@ -111,7 +112,7 @@ public:
if (!gpu_addr) {
return {};
}
- const auto params{SurfaceParams::CreateForImage(tic, entry)};
+ const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
if (guard_samplers) {
sampled_textures.push_back(surface);
@@ -224,8 +225,13 @@ public:
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
const Tegra::Engines::Fermi2D::Config& copy_config) {
std::lock_guard lock{mutex};
- std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config);
- std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config);
+ SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
+ SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
+ const GPUVAddr src_gpu_addr = src_config.Address();
+ const GPUVAddr dst_gpu_addr = dst_config.Address();
+ DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
+ std::pair<TSurface, TView> dst_surface = GetSurface(dst_gpu_addr, dst_params, true, false);
+ std::pair<TSurface, TView> src_surface = GetSurface(src_gpu_addr, src_params, true, false);
ImageBlit(src_surface.second, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick());
}
@@ -282,7 +288,7 @@ protected:
const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
// Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
- // and reading it from a sepparate buffer.
+ // and reading it from a separate buffer.
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
void ManageRenderTargetUnregister(TSurface& surface) {
@@ -357,13 +363,37 @@ private:
BufferCopy = 3,
};
+ enum class DeductionType : u32 {
+ DeductionComplete,
+ DeductionIncomplete,
+ DeductionFailed,
+ };
+
+ struct Deduction {
+ DeductionType type{DeductionType::DeductionFailed};
+ TSurface surface{};
+
+ bool Failed() const {
+ return type == DeductionType::DeductionFailed;
+ }
+
+ bool Incomplete() const {
+ return type == DeductionType::DeductionIncomplete;
+ }
+
+ bool IsDepth() const {
+ return surface->GetSurfaceParams().IsPixelFormatZeta();
+ }
+ };
+
/**
- * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
- * @param overlaps, the overlapping surfaces registered in the cache.
- * @param params, the paremeters on the new surface.
- * @param gpu_addr, the starting address of the new surface.
- * @param untopological, tells the recycler that the texture has no way to match the overlaps
- * due to topological reasons.
+ * Takes care of selecting a proper strategy to deal with a texture recycle.
+ *
+ * @param overlaps The overlapping surfaces registered in the cache.
+ * @param params The parameters on the new surface.
+ * @param gpu_addr The starting address of the new surface.
+ * @param untopological Indicates to the recycler that the texture has no way
+ * to match the overlaps due to topological reasons.
**/
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
@@ -374,7 +404,7 @@ private:
if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
- for (auto s : overlaps) {
+ for (const auto& s : overlaps) {
const auto& s_params = s->GetSurfaceParams();
if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
@@ -391,16 +421,19 @@ private:
}
/**
- * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in
- *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the
- *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the
- *new surface from that data.
- * @param overlaps, the overlapping surfaces registered in the cache.
- * @param params, the paremeters on the new surface.
- * @param gpu_addr, the starting address of the new surface.
- * @param preserve_contents, tells if the new surface should be loaded from meory or left blank
- * @param untopological, tells the recycler that the texture has no way to match the overlaps
- * due to topological reasons.
+ * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
+ * strategies: Ignore and Flush.
+ *
+ * - Ignore: Just unregisters all the overlaps and loads the new texture.
+ * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
+ *
+ * @param overlaps The overlapping surfaces registered in the cache.
+ * @param params The parameters for the new surface.
+ * @param gpu_addr The starting address of the new surface.
+ * @param preserve_contents Indicates that the new surface should be loaded from memory or left
+ * blank.
+ * @param untopological Indicates to the recycler that the texture has no way to match the
+ * overlaps due to topological reasons.
**/
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params, const GPUVAddr gpu_addr,
@@ -437,10 +470,12 @@ private:
}
/**
- * `RebuildSurface` this method takes a single surface and recreates into another that
- * may differ in format, target or width alingment.
- * @param current_surface, the registered surface in the cache which we want to convert.
- * @param params, the new surface params which we'll use to recreate the surface.
+ * Takes a single surface and recreates into another that may differ in
+ * format, target or width alignment.
+ *
+ * @param current_surface The registered surface in the cache which we want to convert.
+ * @param params The new surface params which we'll use to recreate the surface.
+ * @param is_render Whether or not the surface is a render target.
**/
std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
bool is_render) {
@@ -451,15 +486,13 @@ private:
GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
SurfaceParams new_params = params;
new_params.pixel_format = cr_params.pixel_format;
- new_params.component_type = cr_params.component_type;
new_params.type = cr_params.type;
new_surface = GetUncachedSurface(gpu_addr, new_params);
} else {
new_surface = GetUncachedSurface(gpu_addr, params);
}
const auto& final_params = new_surface->GetSurfaceParams();
- if (cr_params.type != final_params.type ||
- (cr_params.component_type != final_params.component_type)) {
+ if (cr_params.type != final_params.type) {
BufferCopy(current_surface, new_surface);
} else {
std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
@@ -474,12 +507,14 @@ private:
}
/**
- * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's
- * params if it's an exact match, we return the main view of the registered surface. If it's
- * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats
+ * Takes a single surface and checks with the new surface's params if it's an exact
+ * match, we return the main view of the registered surface. If its formats don't
+ * match, we rebuild the surface. We call this last method a `Mirage`. If formats
* match but the targets don't, we create an overview View of the registered surface.
- * @param current_surface, the registered surface in the cache which we want to convert.
- * @param params, the new surface params which we want to check.
+ *
+ * @param current_surface The registered surface in the cache which we want to convert.
+ * @param params The new surface params which we want to check.
+ * @param is_render Whether or not the surface is a render target.
**/
std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
const SurfaceParams& params, bool is_render) {
@@ -501,13 +536,14 @@ private:
}
/**
- * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface
- * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps
- * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface
- * for them, else we return nothing.
- * @param overlaps, the overlapping surfaces registered in the cache.
- * @param params, the paremeters on the new surface.
- * @param gpu_addr, the starting address of the new surface.
+ * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
+ * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
+ * of the new surface, if they all match we end up recreating a surface for them,
+ * else we return nothing.
+ *
+ * @param overlaps The overlapping surfaces registered in the cache.
+ * @param params The parameters on the new surface.
+ * @param gpu_addr The starting address of the new surface.
**/
std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params,
@@ -547,7 +583,7 @@ private:
} else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
return {};
}
- for (auto surface : overlaps) {
+ for (const auto& surface : overlaps) {
Unregister(surface);
}
new_surface->MarkAsModified(modified, Tick());
@@ -556,19 +592,27 @@ private:
}
/**
- * `GetSurface` gets the starting address and parameters of a candidate surface and tries
- * to find a matching surface within the cache. This is done in 3 big steps. The first is to
- * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
- * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from
- * memory else we move to step 3. Step 3 consists on figuring the relationship between the
- * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many
- * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the
- * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to
- * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface
- * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface.
- * @param gpu_addr, the starting address of the candidate surface.
- * @param params, the paremeters on the candidate surface.
- * @param preserve_contents, tells if the new surface should be loaded from meory or left blank.
+ * Gets the starting address and parameters of a candidate surface and tries
+ * to find a matching surface within the cache. This is done in 3 big steps:
+ *
+ * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
+ *
+ * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
+ * memory else we move to step 3.
+ *
+ * 3. Consists of figuring out the relationship between the candidate texture and the
+ * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
+ * there's many, we just try to reconstruct a new surface out of them based on the
+ * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
+ * have to check if the candidate is a view (layer/mipmap) of the overlap or if the
+ * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
+ * a new surface.
+ *
+ * @param gpu_addr The starting address of the candidate surface.
+ * @param params The parameters on the candidate surface.
+ * @param preserve_contents Indicates that the new surface should be loaded from memory or
+ * left blank.
+ * @param is_render Whether or not the surface is a render target.
**/
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
bool preserve_contents, bool is_render) {
@@ -623,7 +667,7 @@ private:
// Step 3
// Now we need to figure the relationship between the texture and its overlaps
// we do a topological test to ensure we can find some relationship. If it fails
- // inmediatly recycle the texture
+ // immediately recycle the texture
for (const auto& surface : overlaps) {
const auto topological_result = surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
@@ -691,6 +735,123 @@ private:
MatchTopologyResult::FullMatch);
}
+ /**
+ * Gets the starting address and parameters of a candidate surface and tries to find a
+ * matching surface within the cache that's similar to it. If there are many textures
+ * or the texture found if entirely incompatible, it will fail. If no texture is found, the
+ * blit will be unsuccessful.
+ *
+ * @param gpu_addr The starting address of the candidate surface.
+ * @param params The parameters on the candidate surface.
+ **/
+ Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
+ const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
+ const auto cache_addr{ToCacheAddr(host_ptr)};
+
+ if (!cache_addr) {
+ Deduction result{};
+ result.type = DeductionType::DeductionFailed;
+ return result;
+ }
+
+ if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
+ TSurface& current_surface = iter->second;
+ const auto topological_result = current_surface->MatchesTopology(params);
+ if (topological_result != MatchTopologyResult::FullMatch) {
+ Deduction result{};
+ result.type = DeductionType::DeductionFailed;
+ return result;
+ }
+ const auto struct_result = current_surface->MatchesStructure(params);
+ if (struct_result != MatchStructureResult::None &&
+ current_surface->MatchTarget(params.target)) {
+ Deduction result{};
+ result.type = DeductionType::DeductionComplete;
+ result.surface = current_surface;
+ return result;
+ }
+ }
+
+ const std::size_t candidate_size = params.GetGuestSizeInBytes();
+ auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
+
+ if (overlaps.empty()) {
+ Deduction result{};
+ result.type = DeductionType::DeductionIncomplete;
+ return result;
+ }
+
+ if (overlaps.size() > 1) {
+ Deduction result{};
+ result.type = DeductionType::DeductionFailed;
+ return result;
+ } else {
+ Deduction result{};
+ result.type = DeductionType::DeductionComplete;
+ result.surface = overlaps[0];
+ return result;
+ }
+ }
+
+ /**
+ * Gets the a source and destination starting address and parameters,
+ * and tries to deduce if they are supposed to be depth textures. If so, their
+ * parameters are modified and fixed into so.
+ *
+ * @param src_params The parameters of the candidate surface.
+ * @param dst_params The parameters of the destination surface.
+ * @param src_gpu_addr The starting address of the candidate surface.
+ * @param dst_gpu_addr The starting address of the destination surface.
+ **/
+ void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
+ const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
+ auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
+ auto deduced_dst = DeduceSurface(src_gpu_addr, src_params);
+ if (deduced_src.Failed() || deduced_dst.Failed()) {
+ return;
+ }
+
+ const bool incomplete_src = deduced_src.Incomplete();
+ const bool incomplete_dst = deduced_dst.Incomplete();
+
+ if (incomplete_src && incomplete_dst) {
+ return;
+ }
+
+ const bool any_incomplete = incomplete_src || incomplete_dst;
+
+ if (!any_incomplete) {
+ if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
+ return;
+ }
+ } else {
+ if (incomplete_src && !(deduced_dst.IsDepth())) {
+ return;
+ }
+
+ if (incomplete_dst && !(deduced_src.IsDepth())) {
+ return;
+ }
+ }
+
+ const auto inherit_format = [](SurfaceParams& to, TSurface from) {
+ const SurfaceParams& params = from->GetSurfaceParams();
+ to.pixel_format = params.pixel_format;
+ to.type = params.type;
+ };
+ // Now we got the cases where one or both is Depth and the other is not known
+ if (!incomplete_src) {
+ inherit_format(src_params, deduced_src.surface);
+ } else {
+ inherit_format(src_params, deduced_dst.surface);
+ }
+ if (!incomplete_dst) {
+ inherit_format(dst_params, deduced_dst.surface);
+ } else {
+ inherit_format(dst_params, deduced_src.surface);
+ }
+ }
+
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
bool preserve_contents) {
auto new_surface{GetUncachedSurface(gpu_addr, params)};
@@ -793,6 +954,8 @@ private:
VideoCore::RasterizerInterface& rasterizer;
+ FormatLookupTable format_lookup_table;
+
u64 ticks{};
// Guards the cache for protection conflicts.
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index a9b8f69af..33bd31865 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -92,11 +92,11 @@ private:
const unsigned int mask = 1 << m_NextBit++;
// clear the bit
- *m_CurByte &= ~mask;
+ *m_CurByte &= static_cast<unsigned char>(~mask);
// Write the bit, if necessary
if (b)
- *m_CurByte |= mask;
+ *m_CurByte |= static_cast<unsigned char>(mask);
// Next byte?
if (m_NextBit >= 8) {
@@ -137,7 +137,7 @@ public:
}
uint64_t mask = (1 << (end - start + 1)) - 1;
- return (m_Bits >> start) & mask;
+ return (m_Bits >> start) & static_cast<IntType>(mask);
}
private:
@@ -422,7 +422,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
TexelWeightParams params;
// Read the entire block mode all at once
- uint16_t modeBits = strm.ReadBits(11);
+ uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11));
// Does this match the void extent block mode?
if ((modeBits & 0x01FF) == 0x1FC) {
@@ -625,10 +625,10 @@ static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint
}
// Decode the RGBA components and renormalize them to the range [0, 255]
- uint16_t r = strm.ReadBits(16);
- uint16_t g = strm.ReadBits(16);
- uint16_t b = strm.ReadBits(16);
- uint16_t a = strm.ReadBits(16);
+ uint16_t r = static_cast<uint16_t>(strm.ReadBits(16));
+ uint16_t g = static_cast<uint16_t>(strm.ReadBits(16));
+ uint16_t b = static_cast<uint16_t>(strm.ReadBits(16));
+ uint16_t a = static_cast<uint16_t>(strm.ReadBits(16));
uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 |
(static_cast<uint32_t>(a) & 0xFF00) << 16;
@@ -656,7 +656,7 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
return 0;
if (toBit == 0)
return 0;
- IntType v = val & ((1 << numBits) - 1);
+ IntType v = val & static_cast<IntType>((1 << numBits) - 1);
IntType res = v;
uint32_t reslen = numBits;
while (reslen < toBit) {
@@ -666,8 +666,8 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
comp = numBits - newshift;
numBits = newshift;
}
- res <<= numBits;
- res |= v >> comp;
+ res = static_cast<IntType>(res << numBits);
+ res = static_cast<IntType>(res | (v >> comp));
reslen += numBits;
}
return res;
@@ -681,9 +681,10 @@ protected:
public:
Pixel() = default;
- Pixel(ChannelType a, ChannelType r, ChannelType g, ChannelType b, unsigned bitDepth = 8)
+ Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8)
: m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)},
- color{a, r, g, b} {}
+ color{static_cast<ChannelType>(a), static_cast<ChannelType>(r),
+ static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {}
// Changes the depth of each pixel. This scales the values to
// the appropriate bit depth by either truncating the least
@@ -713,7 +714,7 @@ public:
// Do nothing
return val;
} else if (oldDepth == 0 && newDepth != 0) {
- return (1 << newDepth) - 1;
+ return static_cast<ChannelType>((1 << newDepth) - 1);
} else if (newDepth > oldDepth) {
return Replicate(val, oldDepth, newDepth);
} else {
@@ -721,10 +722,11 @@ public:
if (newDepth == 0) {
return 0xFF;
} else {
- uint8_t bitsWasted = oldDepth - newDepth;
+ uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth);
uint16_t v = static_cast<uint16_t>(val);
- v = (v + (1 << (bitsWasted - 1))) >> bitsWasted;
- v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), (1 << newDepth) - 1);
+ v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
+ v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v),
+ static_cast<uint16_t>((1 << newDepth) - 1));
return static_cast<uint8_t>(v);
}
}
@@ -1190,18 +1192,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF);
uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF);
- seed1 *= seed1;
- seed2 *= seed2;
- seed3 *= seed3;
- seed4 *= seed4;
- seed5 *= seed5;
- seed6 *= seed6;
- seed7 *= seed7;
- seed8 *= seed8;
- seed9 *= seed9;
- seed10 *= seed10;
- seed11 *= seed11;
- seed12 *= seed12;
+ seed1 = static_cast<uint8_t>(seed1 * seed1);
+ seed2 = static_cast<uint8_t>(seed2 * seed2);
+ seed3 = static_cast<uint8_t>(seed3 * seed3);
+ seed4 = static_cast<uint8_t>(seed4 * seed4);
+ seed5 = static_cast<uint8_t>(seed5 * seed5);
+ seed6 = static_cast<uint8_t>(seed6 * seed6);
+ seed7 = static_cast<uint8_t>(seed7 * seed7);
+ seed8 = static_cast<uint8_t>(seed8 * seed8);
+ seed9 = static_cast<uint8_t>(seed9 * seed9);
+ seed10 = static_cast<uint8_t>(seed10 * seed10);
+ seed11 = static_cast<uint8_t>(seed11 * seed11);
+ seed12 = static_cast<uint8_t>(seed12 * seed12);
int32_t sh1, sh2, sh3;
if (seed & 1) {
@@ -1213,18 +1215,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
}
sh3 = (seed & 0x10) ? sh1 : sh2;
- seed1 >>= sh1;
- seed2 >>= sh2;
- seed3 >>= sh1;
- seed4 >>= sh2;
- seed5 >>= sh1;
- seed6 >>= sh2;
- seed7 >>= sh1;
- seed8 >>= sh2;
- seed9 >>= sh3;
- seed10 >>= sh3;
- seed11 >>= sh3;
- seed12 >>= sh3;
+ seed1 = static_cast<uint8_t>(seed1 >> sh1);
+ seed2 = static_cast<uint8_t>(seed2 >> sh2);
+ seed3 = static_cast<uint8_t>(seed3 >> sh1);
+ seed4 = static_cast<uint8_t>(seed4 >> sh2);
+ seed5 = static_cast<uint8_t>(seed5 >> sh1);
+ seed6 = static_cast<uint8_t>(seed6 >> sh2);
+ seed7 = static_cast<uint8_t>(seed7 >> sh1);
+ seed8 = static_cast<uint8_t>(seed8 >> sh2);
+ seed9 = static_cast<uint8_t>(seed9 >> sh3);
+ seed10 = static_cast<uint8_t>(seed10 >> sh3);
+ seed11 = static_cast<uint8_t>(seed11 >> sh3);
+ seed12 = static_cast<uint8_t>(seed12 >> sh3);
int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
@@ -1557,7 +1559,9 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
// Make sure that higher non-texel bits are set to zero
const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
- texelWeightData[clearByteStart - 1] &= (1 << (weightParams.GetPackedBitSize() % 8)) - 1;
+ texelWeightData[clearByteStart - 1] =
+ texelWeightData[clearByteStart - 1] &
+ static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
std::vector<IntegerEncodedValue> texelWeightValues;
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e36bc2c04..8e82c6748 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -132,6 +132,8 @@ enum class SwizzleSource : u32 {
};
union TextureHandle {
+ TextureHandle(u32 raw) : raw{raw} {}
+
u32 raw;
BitField<0, 20, u32> tic_id;
BitField<20, 12, u32> tsc_id;
@@ -340,13 +342,14 @@ struct TSCEntry {
float GetLodBias() const {
// Sign extend the 13-bit value.
constexpr u32 mask = 1U << (13 - 1);
- return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f;
+ return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
}
std::array<float, 4> GetBorderColor() const {
if (srgb_conversion) {
- return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f,
- srgb_border_color_b / 255.0f, border_color[3]};
+ return {static_cast<float>(srgb_border_color_r) / 255.0f,
+ static_cast<float>(srgb_border_color_g) / 255.0f,
+ static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]};
}
return border_color;
}
@@ -354,7 +357,6 @@ struct TSCEntry {
static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
struct FullTextureInfo {
- u32 index;
TICEntry tic;
TSCEntry tsc;
};
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 60cda0ca3..8e947394c 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -28,7 +28,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) {
u16 GetResolutionScaleFactor(const RendererBase& renderer) {
return static_cast<u16>(
- Settings::values.resolution_factor
+ Settings::values.resolution_factor != 0
? Settings::values.resolution_factor
: renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio());
}