aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h5
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h4
-rw-r--r--src/video_core/engines/kepler_compute.cpp8
-rw-r--r--src/video_core/engines/kepler_compute.h4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp8
-rw-r--r--src/video_core/engines/maxwell_3d.h4
-rw-r--r--src/video_core/engines/shader_bytecode.h9
-rw-r--r--src/video_core/guest_driver.cpp36
-rw-r--r--src/video_core/guest_driver.h41
-rw-r--r--src/video_core/rasterizer_interface.h14
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp161
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp37
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h1
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp265
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp19
-rw-r--r--src/video_core/shader/ast.h10
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp17
-rw-r--r--src/video_core/shader/const_buffer_locker.h21
-rw-r--r--src/video_core/shader/decode.cpp68
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp11
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp2
-rw-r--r--src/video_core/shader/decode/bfi.cpp7
-rw-r--r--src/video_core/shader/decode/memory.cpp85
-rw-r--r--src/video_core/shader/decode/other.cpp9
-rw-r--r--src/video_core/shader/decode/texture.cpp114
-rw-r--r--src/video_core/shader/node.h89
-rw-r--r--src/video_core/shader/node_helper.h6
-rw-r--r--src/video_core/shader/shader_ir.cpp9
-rw-r--r--src/video_core/shader/shader_ir.h16
-rw-r--r--src/video_core/shader/track.cpp106
-rw-r--r--src/video_core/video_core.cpp15
37 files changed, 1025 insertions, 217 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index ccfed4f2e..db9332d00 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,6 +29,8 @@ add_library(video_core STATIC
gpu_synch.h
gpu_thread.cpp
gpu_thread.h
+ guest_driver.cpp
+ guest_driver.h
macro_interpreter.cpp
macro_interpreter.h
memory_manager.cpp
@@ -154,6 +156,7 @@ if (ENABLE_VULKAN)
renderer_vulkan/maxwell_to_vk.cpp
renderer_vulkan/maxwell_to_vk.h
renderer_vulkan/renderer_vulkan.h
+ renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/vk_blit_screen.cpp
renderer_vulkan/vk_blit_screen.h
renderer_vulkan/vk_buffer_cache.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 0510ed777..186aca61d 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -101,7 +101,10 @@ public:
void TickFrame() {
++epoch;
while (!pending_destruction.empty()) {
- if (pending_destruction.front()->GetEpoch() + 1 > epoch) {
+ // Delay at least 4 frames before destruction.
+ // This is due to triple buffering happening on some drivers.
+ static constexpr u64 epochs_to_destroy = 5;
+ if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
break;
}
pending_destruction.pop_front();
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index 44b8b8d22..d56a47710 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
#include "video_core/textures/texture.h"
namespace Tegra::Engines {
@@ -106,6 +107,9 @@ public:
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const = 0;
virtual u32 GetBoundBuffer() const = 0;
+
+ virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
+ virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
};
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 110406f2f..4b824aa4e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -94,6 +94,14 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
return result;
}
+VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
+ return rasterizer.AccessGuestDriverProfile();
+}
+
+const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
+ return rasterizer.AccessGuestDriverProfile();
+}
+
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 4ef3e0613..eeb79c56f 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -218,6 +218,10 @@ public:
return regs.tex_cb_index;
}
+ VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
+
+ const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
+
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 58dfa8033..7cea146f0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -784,4 +784,12 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
return result;
}
+VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
+ return rasterizer.AccessGuestDriverProfile();
+}
+
+const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
+ return rasterizer.AccessGuestDriverProfile();
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ee79260fc..8808bbf76 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1306,6 +1306,10 @@ public:
return regs.tex_cb_index;
}
+ VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
+
+ const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
+
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
/// we've seen used.
using MacroMemory = std::array<u32, 0x40000>;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 901adb97b..402869fde 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1144,6 +1144,11 @@ union Instruction {
} fset;
union {
+ BitField<47, 1, u64> ftz;
+ BitField<48, 4, PredCondition> cond;
+ } fcmp;
+
+ union {
BitField<49, 1, u64> bf;
BitField<35, 3, PredCondition> cond;
BitField<50, 1, u64> ftz;
@@ -1723,6 +1728,7 @@ public:
BFE_C,
BFE_R,
BFE_IMM,
+ BFI_RC,
BFI_IMM_R,
BRA,
BRX,
@@ -1820,6 +1826,7 @@ public:
ICMP_R,
ICMP_CR,
ICMP_IMM,
+ FCMP_R,
MUFU, // Multi-Function Operator
RRO_C, // Range Reduction Operator
RRO_R,
@@ -2124,6 +2131,7 @@ private:
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
+ INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -2148,6 +2156,7 @@ private:
INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
+ INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"),
INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
new file mode 100644
index 000000000..6adef459e
--- /dev/null
+++ b/src/video_core/guest_driver.cpp
@@ -0,0 +1,36 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <limits>
+
+#include "video_core/guest_driver.h"
+
+namespace VideoCore {
+
+void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
+ if (texture_handler_size_deduced) {
+ return;
+ }
+ const std::size_t size = bound_offsets.size();
+ if (size < 2) {
+ return;
+ }
+ std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
+ u32 min_val = std::numeric_limits<u32>::max();
+ for (std::size_t i = 1; i < size; ++i) {
+ if (bound_offsets[i] == bound_offsets[i - 1]) {
+ continue;
+ }
+ const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
+ min_val = std::min(min_val, new_min);
+ }
+ if (min_val > 2) {
+ return;
+ }
+ texture_handler_size_deduced = true;
+ texture_handler_size = min_texture_handler_size * min_val;
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
new file mode 100644
index 000000000..fc1917347
--- /dev/null
+++ b/src/video_core/guest_driver.h
@@ -0,0 +1,41 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace VideoCore {
+
+/**
+ * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
+ * information necessary for impossible to avoid HLE methods like shader tracks as they are
+ * Entscheidungsproblems.
+ */
+class GuestDriverProfile {
+public:
+ void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
+
+ u32 GetTextureHandlerSize() const {
+ return texture_handler_size;
+ }
+
+ bool TextureHandlerSizeKnown() const {
+ return texture_handler_size_deduced;
+ }
+
+private:
+ // Minimum size of texture handler any driver can use.
+ static constexpr u32 min_texture_handler_size = 4;
+ // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
+ // use 4 bytes instead. Thus, certain drivers may squish the size.
+ static constexpr u32 default_texture_handler_size = 8;
+
+ u32 texture_handler_size = default_texture_handler_size;
+ bool texture_handler_size_deduced = false;
+};
+
+} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 5b0eca9e2..c586cd6fe 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
+#include "video_core/guest_driver.h"
namespace Tegra {
class MemoryManager;
@@ -78,5 +79,18 @@ public:
/// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
const DiskResourceLoadCallback& callback = {}) {}
+
+ /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
+ GuestDriverProfile& AccessGuestDriverProfile() {
+ return guest_driver_profile;
+ }
+
+ /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
+ const GuestDriverProfile& AccessGuestDriverProfile() const {
+ return guest_driver_profile;
+ }
+
+private:
+ GuestDriverProfile guest_driver_profile{};
};
} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c428f06e4..46a7433ea 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -55,16 +55,20 @@ namespace {
template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
- Tegra::Engines::ShaderType shader_type) {
+ Tegra::Engines::ShaderType shader_type,
+ std::size_t index = 0) {
if (entry.IsBindless()) {
const Tegra::Texture::TextureHandle tex_handle =
engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
return engine.GetTextureInfo(tex_handle);
}
+ const auto& gpu_profile = engine.AccessGuestDriverProfile();
+ const u32 offset =
+ entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
- return engine.GetStageTexture(shader_type, entry.GetOffset());
+ return engine.GetStageTexture(shader_type, offset);
} else {
- return engine.GetTexture(entry.GetOffset());
+ return engine.GetTexture(offset);
}
}
@@ -244,9 +248,6 @@ void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
}
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
- if (accelerate_draw != AccelDraw::Indexed) {
- return 0;
- }
MICROPROFILE_SCOPE(OpenGL_Index);
const auto& regs = system.GPU().Maxwell3D().regs;
const std::size_t size = CalculateIndexBufferSize();
@@ -542,7 +543,8 @@ void RasterizerOpenGL::Clear() {
}
}
-void RasterizerOpenGL::DrawPrelude() {
+void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
+ MICROPROFILE_SCOPE(OpenGL_Drawing);
auto& gpu = system.GPU().Maxwell3D();
SyncRasterizeEnable(state);
@@ -563,9 +565,6 @@ void RasterizerOpenGL::DrawPrelude() {
buffer_cache.Acquire();
- // Draw the vertex batch
- const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
-
std::size_t buffer_size = CalculateVertexArraysSize();
// Add space for index buffer
@@ -592,7 +591,11 @@ void RasterizerOpenGL::DrawPrelude() {
// Upload vertex and index data.
SetupVertexBuffer(vao);
SetupVertexInstances(vao);
- index_buffer_offset = SetupIndexBuffer();
+
+ GLintptr index_buffer_offset;
+ if (is_indexed) {
+ index_buffer_offset = SetupIndexBuffer();
+ }
// Prepare packed bindings.
bind_ubo_pushbuffer.Setup();
@@ -626,6 +629,7 @@ void RasterizerOpenGL::DrawPrelude() {
// As all cached buffers are invalidated, we need to recheck their state.
gpu.dirty.ResetVertexArrays();
}
+ gpu.dirty.memory_general = false;
shader_program_manager->ApplyTo(state);
state.Apply();
@@ -633,106 +637,33 @@ void RasterizerOpenGL::DrawPrelude() {
if (texture_cache.TextureBarrier()) {
glTextureBarrier();
}
-}
-
-struct DrawParams {
- bool is_indexed{};
- bool is_instanced{};
- GLenum primitive_mode{};
- GLint count{};
- GLint base_vertex{};
-
- // Indexed settings
- GLenum index_format{};
- GLintptr index_buffer_offset{};
-
- // Instanced setting
- GLint num_instances{};
- GLint base_instance{};
-
- void DispatchDraw() {
- if (is_indexed) {
- const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
- if (is_instanced) {
- glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
- index_buffer_ptr, num_instances,
- base_vertex, base_instance);
- } else {
- glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
- base_vertex);
- }
- } else {
- if (is_instanced) {
- glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, count, num_instances,
- base_instance);
- } else {
- glDrawArrays(primitive_mode, base_vertex, count);
- }
- }
- }
-};
-
-bool RasterizerOpenGL::DrawBatch(bool is_indexed) {
- accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
-
- MICROPROFILE_SCOPE(OpenGL_Drawing);
-
- DrawPrelude();
- auto& maxwell3d = system.GPU().Maxwell3D();
- const auto& regs = maxwell3d.regs;
- const auto current_instance = maxwell3d.state.current_instance;
- DrawParams draw_call{};
- draw_call.is_indexed = is_indexed;
- draw_call.num_instances = static_cast<GLint>(1);
- draw_call.base_instance = static_cast<GLint>(current_instance);
- draw_call.is_instanced = current_instance > 0;
- draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
- if (draw_call.is_indexed) {
- draw_call.count = static_cast<GLint>(regs.index_array.count);
- draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
- draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
- draw_call.index_buffer_offset = index_buffer_offset;
+ const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
+ const GLsizei num_instances =
+ static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
+ if (is_indexed) {
+ const GLenum index_format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
+ const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
+ const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
+ glDrawElementsInstancedBaseVertexBaseInstance(
+ primitive_mode, num_vertices, index_format,
+ reinterpret_cast<const void*>(index_buffer_offset), num_instances, base_vertex,
+ base_instance);
} else {
- draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
- draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
+ const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
+ const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
+ glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, num_instances,
+ base_instance);
}
- draw_call.DispatchDraw();
+}
- maxwell3d.dirty.memory_general = false;
- accelerate_draw = AccelDraw::Disabled;
+bool RasterizerOpenGL::DrawBatch(bool is_indexed) {
+ Draw(is_indexed, false);
return true;
}
bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) {
- accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
-
- MICROPROFILE_SCOPE(OpenGL_Drawing);
-
- DrawPrelude();
-
- auto& maxwell3d = system.GPU().Maxwell3D();
- const auto& regs = maxwell3d.regs;
- const auto& draw_setup = maxwell3d.mme_draw;
- DrawParams draw_call{};
- draw_call.is_indexed = is_indexed;
- draw_call.num_instances = static_cast<GLint>(draw_setup.instance_count);
- draw_call.base_instance = static_cast<GLint>(regs.vb_base_instance);
- draw_call.is_instanced = draw_setup.instance_count > 1;
- draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
- if (draw_call.is_indexed) {
- draw_call.count = static_cast<GLint>(regs.index_array.count);
- draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
- draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
- draw_call.index_buffer_offset = index_buffer_offset;
- } else {
- draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
- draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
- }
- draw_call.DispatchDraw();
-
- maxwell3d.dirty.memory_general = false;
- accelerate_draw = AccelDraw::Disabled;
+ Draw(is_indexed, true);
return true;
}
@@ -942,8 +873,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
u32 binding = device.GetBaseBindings(stage_index).sampler;
for (const auto& entry : shader->GetShaderEntries().samplers) {
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
- const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
- SetupTexture(binding++, texture, entry);
+ if (!entry.IsIndexed()) {
+ const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
+ SetupTexture(binding++, texture, entry);
+ } else {
+ for (std::size_t i = 0; i < entry.Size(); ++i) {
+ const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
+ SetupTexture(binding++, texture, entry);
+ }
+ }
}
}
@@ -952,8 +890,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().samplers) {
- const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
- SetupTexture(binding++, texture, entry);
+ if (!entry.IsIndexed()) {
+ const auto texture =
+ GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
+ SetupTexture(binding++, texture, entry);
+ } else {
+ for (std::size_t i = 0; i < entry.Size(); ++i) {
+ const auto texture =
+ GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i);
+ SetupTexture(binding++, texture, entry);
+ }
+ }
}
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 6a27cf497..0501f3828 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -103,7 +103,7 @@ private:
std::size_t size);
/// Syncs all the state, shaders, render targets and textures setting before a draw call.
- void DrawPrelude();
+ void Draw(bool is_indexed, bool is_instanced);
/// Configures the current textures to use for the draw command.
void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
@@ -220,12 +220,7 @@ private:
GLintptr SetupIndexBuffer();
- GLintptr index_buffer_offset;
-
void SetupShaders(GLenum primitive_mode);
-
- enum class AccelDraw { Disabled, Arrays, Indexed };
- AccelDraw accelerate_draw = AccelDraw::Disabled;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3c5bdd377..489eb143c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s
}
void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
+ locker.SetBoundBuffer(usage.bound_buffer);
for (const auto& key : usage.keys) {
const auto [buffer, offset] = key.first;
locker.InsertKey(buffer, offset, key.second);
@@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() {
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
const ConstBufferLocker& locker) const {
- return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(),
+ return ShaderDiskCacheUsage{unique_identifier, variant,
+ locker.GetBoundBuffer(), locker.GetKeys(),
locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a1ac3d7a9..4735000b5 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -391,6 +391,7 @@ public:
DeclareVertex();
DeclareGeometry();
DeclareRegisters();
+ DeclareCustomVariables();
DeclarePredicates();
DeclareLocalMemory();
DeclareInternalFlags();
@@ -503,6 +504,16 @@ private:
}
}
+ void DeclareCustomVariables() {
+ const u32 num_custom_variables = ir.GetNumCustomVariables();
+ for (u32 i = 0; i < num_custom_variables; ++i) {
+ code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
+ }
+ if (num_custom_variables > 0) {
+ code.AddNewLine();
+ }
+ }
+
void DeclarePredicates() {
const auto& predicates = ir.GetPredicates();
for (const auto pred : predicates) {
@@ -655,7 +666,8 @@ private:
u32 binding = device.GetBaseBindings(stage).sampler;
for (const auto& sampler : ir.GetSamplers()) {
const std::string name = GetSampler(sampler);
- const std::string description = fmt::format("layout (binding = {}) uniform", binding++);
+ const std::string description = fmt::format("layout (binding = {}) uniform", binding);
+ binding += sampler.IsIndexed() ? sampler.Size() : 1;
std::string sampler_type = [&]() {
if (sampler.IsBuffer()) {
@@ -682,7 +694,11 @@ private:
sampler_type += "Shadow";
}
- code.AddLine("{} {} {};", description, sampler_type, name);
+ if (!sampler.IsIndexed()) {
+ code.AddLine("{} {} {};", description, sampler_type, name);
+ } else {
+ code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size());
+ }
}
if (!ir.GetSamplers().empty()) {
code.AddNewLine();
@@ -775,6 +791,11 @@ private:
return {GetRegister(index), Type::Float};
}
+ if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
+ const u32 index = cv->GetIndex();
+ return {GetCustomVariable(index), Type::Float};
+ }
+
if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
const u32 value = immediate->GetValue();
if (value < 10) {
@@ -1098,7 +1119,11 @@ private:
} else if (!meta->ptp.empty()) {
expr += "Offsets";
}
- expr += '(' + GetSampler(meta->sampler) + ", ";
+ if (!meta->sampler.IsIndexed()) {
+ expr += '(' + GetSampler(meta->sampler) + ", ";
+ } else {
+ expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
+ }
expr += coord_constructors.at(count + (has_array ? 1 : 0) +
(has_shadow && !separate_dc ? 1 : 0) - 1);
expr += '(';
@@ -1310,6 +1335,8 @@ private:
const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
Type::Uint};
+ } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
+ target = {GetCustomVariable(cv->GetIndex()), Type::Float};
} else {
UNREACHABLE_MSG("Assign called without a proper target");
}
@@ -2237,6 +2264,10 @@ private:
return GetDeclarationWithSuffix(index, "gpr");
}
+ std::string GetCustomVariable(u32 index) const {
+ return GetDeclarationWithSuffix(index, "custom_var");
+ }
+
std::string GetPredicate(Tegra::Shader::Pred pred) const {
return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
}
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index cf874a09a..1fc204f6f 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -53,7 +53,7 @@ struct BindlessSamplerKey {
Tegra::Engines::SamplerDescriptor sampler{};
};
-constexpr u32 NativeVersion = 11;
+constexpr u32 NativeVersion = 12;
// Making sure sizes doesn't change by accident
static_assert(sizeof(ProgramVariant) == 20);
@@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
u32 num_bound_samplers{};
u32 num_bindless_samplers{};
if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
- file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
+ file.ReadArray(&usage.variant, 1) != 1 ||
+ file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
file.ReadArray(&num_bound_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
LOG_ERROR(Render_OpenGL, error_loading);
@@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
u32 num_bindless_samplers{};
ShaderDiskCacheUsage usage;
if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
- !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) ||
+ !LoadObjectFromPrecompiled(usage.variant) ||
+ !LoadObjectFromPrecompiled(usage.bound_buffer) ||
+ !LoadObjectFromPrecompiled(num_keys) ||
!LoadObjectFromPrecompiled(num_bound_samplers) ||
!LoadObjectFromPrecompiled(num_bindless_samplers)) {
return {};
@@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
+ file.WriteObject(usage.bound_buffer) != 1 ||
file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
@@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
};
if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
- !SaveObjectToPrecompiled(usage.variant) ||
+ !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 69a2fbdda..ef2371f6d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>);
struct ShaderDiskCacheUsage {
u64 unique_identifier{};
ProgramVariant variant;
+ u32 bound_buffer{};
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
new file mode 100644
index 000000000..d5032b432
--- /dev/null
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -0,0 +1,265 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/telemetry.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/frontend/emu_window.h"
+#include "core/memory.h"
+#include "core/perf_stats.h"
+#include "core/settings.h"
+#include "core/telemetry_session.h"
+#include "video_core/gpu.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+
+namespace Vulkan {
+
+namespace {
+
+VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
+ VkDebugUtilsMessageTypeFlagsEXT type,
+ const VkDebugUtilsMessengerCallbackDataEXT* data,
+ [[maybe_unused]] void* user_data) {
+ const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_};
+ const char* message{data->pMessage};
+
+ if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) {
+ LOG_CRITICAL(Render_Vulkan, "{}", message);
+ } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning) {
+ LOG_WARNING(Render_Vulkan, "{}", message);
+ } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo) {
+ LOG_INFO(Render_Vulkan, "{}", message);
+ } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose) {
+ LOG_DEBUG(Render_Vulkan, "{}", message);
+ }
+ return VK_FALSE;
+}
+
+std::string GetReadableVersion(u32 version) {
+ return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
+ VK_VERSION_PATCH(version));
+}
+
+std::string GetDriverVersion(const VKDevice& device) {
+ // Extracted from
+ // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
+ const u32 version = device.GetDriverVersion();
+
+ if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+ const u32 major = (version >> 22) & 0x3ff;
+ const u32 minor = (version >> 14) & 0x0ff;
+ const u32 secondary = (version >> 6) & 0x0ff;
+ const u32 tertiary = version & 0x003f;
+ return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
+ }
+ if (device.GetDriverID() == vk::DriverIdKHR::eIntelProprietaryWindows) {
+ const u32 major = version >> 14;
+ const u32 minor = version & 0x3fff;
+ return fmt::format("{}.{}", major, minor);
+ }
+
+ return GetReadableVersion(version);
+}
+
+std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) {
+ std::sort(std::begin(available_extensions), std::end(available_extensions));
+
+ static constexpr std::size_t AverageExtensionSize = 64;
+ std::string separated_extensions;
+ separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
+
+ const auto end = std::end(available_extensions);
+ for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
+ if (const bool is_last = extension + 1 == end; is_last) {
+ separated_extensions += *extension;
+ } else {
+ separated_extensions += fmt::format("{},", *extension);
+ }
+ }
+ return separated_extensions;
+}
+
+} // Anonymous namespace
+
+RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system)
+ : RendererBase(window), system{system} {}
+
+RendererVulkan::~RendererVulkan() {
+ ShutDown();
+}
+
+void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ const auto& layout = render_window.GetFramebufferLayout();
+ if (framebuffer && layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
+ const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
+ const bool use_accelerated =
+ rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
+ const bool is_srgb = use_accelerated && screen_info.is_srgb;
+ if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
+ swapchain->Create(layout.width, layout.height, is_srgb);
+ blit_screen->Recreate();
+ }
+
+ scheduler->WaitWorker();
+
+ swapchain->AcquireNextImage();
+ const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated);
+
+ scheduler->Flush(false, render_semaphore);
+
+ if (swapchain->Present(render_semaphore, fence)) {
+ blit_screen->Recreate();
+ }
+
+ render_window.SwapBuffers();
+ rasterizer->TickFrame();
+ }
+
+ render_window.PollEvents();
+}
+
+bool RendererVulkan::Init() {
+ PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
+ render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface);
+ const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr);
+
+ std::optional<vk::DebugUtilsMessengerEXT> callback;
+ if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) {
+ callback = CreateDebugCallback(dldi);
+ if (!callback) {
+ return false;
+ }
+ }
+
+ if (!PickDevices(dldi)) {
+ if (callback) {
+ instance.destroy(*callback, nullptr, dldi);
+ }
+ return false;
+ }
+ debug_callback = UniqueDebugUtilsMessengerEXT(
+ *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>(
+ instance, nullptr, device->GetDispatchLoader()));
+
+ Report();
+
+ memory_manager = std::make_unique<VKMemoryManager>(*device);
+
+ resource_manager = std::make_unique<VKResourceManager>(*device);
+
+ const auto& framebuffer = render_window.GetFramebufferLayout();
+ swapchain = std::make_unique<VKSwapchain>(surface, *device);
+ swapchain->Create(framebuffer.width, framebuffer.height, false);
+
+ scheduler = std::make_unique<VKScheduler>(*device, *resource_manager);
+
+ rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
+ *resource_manager, *memory_manager, *scheduler);
+
+ blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
+ *resource_manager, *memory_manager, *swapchain,
+ *scheduler, screen_info);
+
+ return true;
+}
+
+void RendererVulkan::ShutDown() {
+ if (!device) {
+ return;
+ }
+ const auto dev = device->GetLogical();
+ const auto& dld = device->GetDispatchLoader();
+ if (dev && dld.vkDeviceWaitIdle) {
+ dev.waitIdle(dld);
+ }
+
+ rasterizer.reset();
+ blit_screen.reset();
+ scheduler.reset();
+ swapchain.reset();
+ memory_manager.reset();
+ resource_manager.reset();
+ device.reset();
+}
+
+std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback(
+ const vk::DispatchLoaderDynamic& dldi) {
+ const vk::DebugUtilsMessengerCreateInfoEXT callback_ci(
+ {},
+ vk::DebugUtilsMessageSeverityFlagBitsEXT::eError |
+ vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning |
+ vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo |
+ vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose,
+ vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral |
+ vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation |
+ vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance,
+ &DebugCallback, nullptr);
+ vk::DebugUtilsMessengerEXT callback;
+ if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) !=
+ vk::Result::eSuccess) {
+ LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
+ return {};
+ }
+ return callback;
+}
+
+bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) {
+ const auto devices = instance.enumeratePhysicalDevices(dldi);
+
+ // TODO(Rodrigo): Choose device from config file
+ const s32 device_index = Settings::values.vulkan_device;
+ if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
+ LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
+ return false;
+ }
+ const vk::PhysicalDevice physical_device = devices[device_index];
+
+ if (!VKDevice::IsSuitable(dldi, physical_device, surface)) {
+ return false;
+ }
+
+ device = std::make_unique<VKDevice>(dldi, physical_device, surface);
+ return device->Create(dldi, instance);
+}
+
+void RendererVulkan::Report() const {
+ const std::string vendor_name{device->GetVendorName()};
+ const std::string model_name{device->GetModelName()};
+ const std::string driver_version = GetDriverVersion(*device);
+ const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
+
+ const std::string api_version = GetReadableVersion(device->GetApiVersion());
+
+ const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
+
+ LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
+ LOG_INFO(Render_Vulkan, "Device: {}", model_name);
+ LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
+
+ auto& telemetry_session = system.TelemetrySession();
+ constexpr auto field = Telemetry::FieldType::UserSystem;
+ telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
+ telemetry_session.AddField(field, "GPU_Model", model_name);
+ telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
+ telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version);
+ telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
+}
+
+} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 939eebe83..9840f26e5 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -400,8 +400,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
false);
- Test(extension, nv_device_diagnostic_checkpoints,
- VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
+ if (Settings::values.renderer_debug) {
+ Test(extension, nv_device_diagnostic_checkpoints,
+ VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
+ }
}
if (khr_shader_float16_int8) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index d2c6b1189..aada38702 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -571,7 +571,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
}
if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
- texceptions.set(rt);
+ texceptions[rt] = true;
}
}
@@ -579,7 +579,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
zeta_attachment = texture_cache.GetDepthBufferSurface(true);
}
if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
- texceptions.set(ZETA_TEXCEPTION_INDEX);
+ texceptions[ZETA_TEXCEPTION_INDEX] = true;
}
texture_cache.GuardRenderTargets(false);
@@ -1122,11 +1122,12 @@ RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions)
for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) {
const auto& rendertarget = regs.rt[rt];
- if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE)
+ if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) {
continue;
+ }
renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{
static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format),
- texceptions.test(rt)});
+ texceptions[rt]});
}
renderpass_params.has_zeta = regs.zeta_enable;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 1ab22251e..24a658dce 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -353,6 +353,7 @@ private:
DeclareFragment();
DeclareCompute();
DeclareRegisters();
+ DeclareCustomVariables();
DeclarePredicates();
DeclareLocalMemory();
DeclareSharedMemory();
@@ -586,6 +587,15 @@ private:
}
}
+ void DeclareCustomVariables() {
+ const u32 num_custom_variables = ir.GetNumCustomVariables();
+ for (u32 i = 0; i < num_custom_variables; ++i) {
+ const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
+ Name(id, fmt::format("custom_var_{}", i));
+ custom_variables.emplace(i, AddGlobalVariable(id));
+ }
+ }
+
void DeclarePredicates() {
for (const auto pred : ir.GetPredicates()) {
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
@@ -982,6 +992,11 @@ private:
return {OpLoad(t_float, registers.at(index)), Type::Float};
}
+ if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
+ const u32 index = cv->GetIndex();
+ return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
+ }
+
if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
return {Constant(t_uint, immediate->GetValue()), Type::Uint};
}
@@ -1333,6 +1348,9 @@ private:
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
+ } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
+ target = {custom_variables.at(cv->GetIndex()), Type::Float};
+
} else {
UNIMPLEMENTED();
}
@@ -2508,6 +2526,7 @@ private:
Id out_vertex{};
Id in_vertex{};
std::map<u32, Id> registers;
+ std::map<u32, Id> custom_variables;
std::map<Tegra::Shader::Pred, Id> predicates;
std::map<u32, Id> flow_variables;
Id local_memory{};
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
index a2f0044ba..cca13bcde 100644
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@@ -65,8 +65,8 @@ public:
void DetachSegment(ASTNode start, ASTNode end);
void Remove(ASTNode node);
- ASTNode first{};
- ASTNode last{};
+ ASTNode first;
+ ASTNode last;
};
class ASTProgram {
@@ -299,9 +299,9 @@ private:
friend class ASTZipper;
ASTData data;
- ASTNode parent{};
- ASTNode next{};
- ASTNode previous{};
+ ASTNode parent;
+ ASTNode next;
+ ASTNode previous;
ASTZipper* manager{};
};
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
index a4a0319eb..0638be8cb 100644
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle
return value;
}
+std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
+ if (bound_buffer_saved) {
+ return bound_buffer;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ bound_buffer_saved = true;
+ bound_buffer = engine->GetBoundBuffer();
+ return bound_buffer;
+}
+
void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
keys.insert_or_assign({buffer, offset}, value);
}
@@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes
bindless_samplers.insert_or_assign({buffer, offset}, sampler);
}
+void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
+ bound_buffer_saved = true;
+ bound_buffer = buffer;
+}
+
bool ConstBufferLocker::IsConsistent() const {
if (!engine) {
return false;
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index d32e2d657..d3ea11087 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -10,6 +10,7 @@
#include "common/hash.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
namespace VideoCommon::Shader {
@@ -40,6 +41,8 @@ public:
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
+ std::optional<u32> ObtainBoundBuffer();
+
/// Inserts a key.
void InsertKey(u32 buffer, u32 offset, u32 value);
@@ -49,6 +52,9 @@ public:
/// Inserts a bindless sampler key.
void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+ /// Set the bound buffer for this locker.
+ void SetBoundBuffer(u32 buffer);
+
/// Checks keys and samplers against engine's current const buffers. Returns true if they are
/// the same value, false otherwise;
bool IsConsistent() const;
@@ -71,12 +77,27 @@ public:
return bindless_samplers;
}
+ /// Gets bound buffer used on this shader
+ u32 GetBoundBuffer() const {
+ return bound_buffer;
+ }
+
+ /// Obtains access to the guest driver's profile.
+ VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
+ if (engine) {
+ return &engine->AccessGuestDriverProfile();
+ }
+ return nullptr;
+ }
+
private:
const Tegra::Engines::ShaderType stage;
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
KeyMap keys;
BoundSamplerMap bound_samplers;
BindlessSamplerMap bindless_samplers;
+ bool bound_buffer_saved{};
+ u32 bound_buffer{};
};
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 22c3e5120..6b697ed5d 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <cstring>
+#include <limits>
#include <set>
#include <fmt/format.h>
@@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
return (absolute_offset % SchedPeriod) == 0;
}
+void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
+ const std::list<Sampler>& used_samplers) {
+ if (gpu_driver == nullptr) {
+ LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
+ return;
+ }
+ if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
+ return;
+ }
+ u32 count{};
+ std::vector<u32> bound_offsets;
+ for (const auto& sampler : used_samplers) {
+ if (sampler.IsBindless()) {
+ continue;
+ }
+ ++count;
+ bound_offsets.emplace_back(sampler.GetOffset());
+ }
+ if (count > 1) {
+ gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
+ }
+}
+
+std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
+ VideoCore::GuestDriverProfile* gpu_driver,
+ const std::list<Sampler>& used_samplers) {
+ if (gpu_driver == nullptr) {
+ LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
+ return std::nullopt;
+ }
+ const u32 base_offset = sampler_to_deduce.GetOffset();
+ u32 max_offset{std::numeric_limits<u32>::max()};
+ for (const auto& sampler : used_samplers) {
+ if (sampler.IsBindless()) {
+ continue;
+ }
+ if (sampler.GetOffset() > base_offset) {
+ max_offset = std::min(sampler.GetOffset(), max_offset);
+ }
+ }
+ if (max_offset == std::numeric_limits<u32>::max()) {
+ return std::nullopt;
+ }
+ return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
+}
+
} // Anonymous namespace
class ASTDecoder {
@@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
return pc + 1;
}
+void ShaderIR::PostDecode() {
+ // Deduce texture handler size if needed
+ auto gpu_driver = locker.AccessGuestDriverProfile();
+ DeduceTextureHandlerSize(gpu_driver, used_samplers);
+ // Deduce Indexed Samplers
+ if (!uses_indexed_samplers) {
+ return;
+ }
+ for (auto& sampler : used_samplers) {
+ if (!sampler.IsIndexed()) {
+ continue;
+ }
+ if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
+ sampler.SetSize(*size);
+ } else {
+ LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
+ sampler.SetSize(1);
+ }
+ }
+}
+
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index fcedd2af6..90240c765 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -21,7 +21,7 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
Node op_a = GetRegister(instr.gpr8);
- Node op_b = [&]() -> Node {
+ Node op_b = [&] {
if (instr.is_b_imm) {
return GetImmediate19(instr);
} else if (instr.is_b_gpr) {
@@ -141,6 +141,15 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, value);
break;
}
+ case OpCode::Id::FCMP_R: {
+ UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
+ Node op_c = GetRegister(instr.gpr39);
+ Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
+ SetRegister(
+ bb, instr.gpr0,
+ Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
+ break;
+ }
case OpCode::Id::RRO_C:
case OpCode::Id::RRO_R:
case OpCode::Id::RRO_IMM: {
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 371fae127..e60875cc4 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -297,7 +297,7 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod
const Node one = Immediate(1);
const Node two = Immediate(2);
- Node value{};
+ Node value;
for (u32 i = 0; i < lop_iterations; ++i) {
const Node shift_amount = Immediate(i);
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index 8be1119df..f992bbe2a 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -17,10 +17,13 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> {
+ const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
switch (opcode->get().GetId()) {
+ case OpCode::Id::BFI_RC:
+ return {GetRegister(instr.gpr39),
+ GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
case OpCode::Id::BFI_IMM_R:
- return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())};
+ return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
default:
UNREACHABLE();
return {Immediate(0), Immediate(0)};
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 3da833e81..b5fbc4d58 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -24,6 +24,7 @@ using Tegra::Shader::GlobalAtomicType;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
+using Tegra::Shader::StoreType;
namespace {
@@ -63,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
}
}
+Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
+ Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
+ offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
+ return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
+ Immediate(size));
+}
+
+Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
+ Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
+ offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
+ return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
+ std::move(offset), Immediate(size));
+}
+
+Node Sign16Extend(Node value) {
+ Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
+ Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
+ Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
+ return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
+}
+
} // Anonymous namespace
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -138,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));
[[fallthrough]];
case OpCode::Id::LD_S: {
- const auto GetMemory = [&](s32 offset) {
+ const auto GetAddress = [&](s32 offset) {
ASSERT(offset % 4 == 0);
const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
- const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
- immediate_offset);
- return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address)
- : GetLocalMemory(address);
+ return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
+ };
+ const auto GetMemory = [&](s32 offset) {
+ return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
+ : GetLocalMemory(GetAddress(offset));
};
switch (instr.ldst_sl.type.Value()) {
- case Tegra::Shader::StoreType::Bits32:
- case Tegra::Shader::StoreType::Bits64:
- case Tegra::Shader::StoreType::Bits128: {
- const u32 count = [&]() {
+ case StoreType::Signed16:
+ SetRegister(bb, instr.gpr0,
+ Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
+ break;
+ case StoreType::Bits32:
+ case StoreType::Bits64:
+ case StoreType::Bits128: {
+ const u32 count = [&] {
switch (instr.ldst_sl.type.Value()) {
- case Tegra::Shader::StoreType::Bits32:
+ case StoreType::Bits32:
return 1;
- case Tegra::Shader::StoreType::Bits64:
+ case StoreType::Bits64:
return 2;
- case Tegra::Shader::StoreType::Bits128:
+ case StoreType::Bits128:
return 4;
default:
UNREACHABLE();
@@ -214,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
// To handle unaligned loads get the bytes used to dereference global memory and extract
// those bytes from the loaded u32.
if (IsUnaligned(type)) {
- Node mask = Immediate(GetUnalignedMask(type));
- Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
- offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
-
- gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
- std::move(offset), Immediate(size));
+ gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
}
SetTemporary(bb, i, gmem);
@@ -271,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
};
- const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L
- ? &ShaderIR::SetLocalMemory
- : &ShaderIR::SetSharedMemory;
+ const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
+ const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
+ const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
switch (instr.ldst_sl.type.Value()) {
- case Tegra::Shader::StoreType::Bits128:
+ case StoreType::Bits128:
(this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
(this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
[[fallthrough]];
- case Tegra::Shader::StoreType::Bits64:
+ case StoreType::Bits64:
(this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
[[fallthrough]];
- case Tegra::Shader::StoreType::Bits32:
+ case StoreType::Bits32:
(this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
break;
+ case StoreType::Signed16: {
+ Node address = GetAddress(0);
+ Node memory = (this->*get_memory)(address);
+ (this->*set_memory)(
+ bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
+ break;
+ }
default:
UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
static_cast<u32>(instr.ldst_sl.type.Value()));
@@ -325,12 +354,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
Node value = GetRegister(instr.gpr0.Value() + i);
if (IsUnaligned(type)) {
- Node mask = Immediate(GetUnalignedMask(type));
- Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
- offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
-
- value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
- Immediate(size));
+ const u32 mask = GetUnalignedMask(type);
+ value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
}
bb.push_back(Operation(OperationCode::Assign, gmem, value));
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 7321698b2..4944e9d69 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -69,13 +69,16 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
case OpCode::Id::MOV_SYS: {
const Node value = [this, instr] {
switch (instr.sys20) {
+ case SystemVariable::LaneId:
+ LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete");
+ return Immediate(0U);
case SystemVariable::InvocationId:
return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
- return Immediate(0u);
+ return Immediate(0U);
case SystemVariable::Tid: {
Node value = Immediate(0);
value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9);
@@ -188,7 +191,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
static_cast<u32>(cc));
- if (disable_flow_stack) {
+ if (decompiled) {
break;
}
@@ -200,7 +203,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
static_cast<u32>(cc));
- if (disable_flow_stack) {
+ if (decompiled) {
break;
}
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0b567e39d..351c8c2f1 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element};
+ MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {},
+ {}, {}, component, element, {}};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const auto derivate_reg = instr.gpr20.Value();
const auto texture_type = instr.txd.texture_type.Value();
const auto coord_count = GetCoordCount(texture_type);
-
+ Node index_var{};
const Sampler* sampler =
- is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}})
+ is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}})
: GetSampler(instr.sampler, {{texture_type, is_array, false}});
Node4 values;
if (sampler == nullptr) {
@@ -200,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
for (u32 element = 0; element < values.size(); ++element) {
- MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element};
+ MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates,
+ {}, {}, {}, element, index_var};
values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
}
@@ -215,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
+ Node index_var{};
const Sampler* sampler =
- is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler);
+ is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler);
if (sampler == nullptr) {
u32 indexer = 0;
@@ -240,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
if (!instr.txq.IsComponentEnabled(element)) {
continue;
}
- MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta,
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -266,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
+ Node index_var{};
const Sampler* sampler =
- is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler);
+ is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler);
if (sampler == nullptr) {
u32 indexer = 0;
@@ -309,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
continue;
}
auto params = coords;
- MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporary(bb, indexer++, value);
}
@@ -383,37 +387,65 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
// Otherwise create a new mapping for this sampler
const auto next_index = static_cast<u32>(used_samplers.size());
return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
- info.is_buffer);
+ info.is_buffer, false);
}
-const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
+const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
std::optional<SamplerInfo> sampler_info) {
const Node sampler_register = GetRegister(reg);
- const auto [base_sampler, buffer, offset] =
- TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
- ASSERT(base_sampler != nullptr);
- if (base_sampler == nullptr) {
+ const auto [base_node, tracked_sampler_info] =
+ TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
+ ASSERT(base_node != nullptr);
+ if (base_node == nullptr) {
return nullptr;
}
- const auto info = GetSamplerInfo(sampler_info, offset, buffer);
+ if (const auto bindless_sampler_info =
+ std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
+ const u32 buffer = bindless_sampler_info->GetIndex();
+ const u32 offset = bindless_sampler_info->GetOffset();
+ const auto info = GetSamplerInfo(sampler_info, offset, buffer);
+
+ // If this sampler has already been used, return the existing mapping.
+ const auto it =
+ std::find_if(used_samplers.begin(), used_samplers.end(),
+ [buffer = buffer, offset = offset](const Sampler& entry) {
+ return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+ });
+ if (it != used_samplers.end()) {
+ ASSERT(it->IsBindless() && it->GetType() == info.type &&
+ it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow);
+ return &*it;
+ }
- // If this sampler has already been used, return the existing mapping.
- const auto it =
- std::find_if(used_samplers.begin(), used_samplers.end(),
- [buffer = buffer, offset = offset](const Sampler& entry) {
- return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
- });
- if (it != used_samplers.end()) {
- ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
- it->IsShadow() == info.is_shadow);
- return &*it;
- }
+ // Otherwise create a new mapping for this sampler
+ const auto next_index = static_cast<u32>(used_samplers.size());
+ return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
+ info.is_shadow, info.is_buffer, false);
+ } else if (const auto array_sampler_info =
+ std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
+ const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
+ index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
+ const auto info = GetSamplerInfo(sampler_info, base_offset);
+
+ // If this sampler has already been used, return the existing mapping.
+ const auto it = std::find_if(
+ used_samplers.begin(), used_samplers.end(),
+ [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; });
+ if (it != used_samplers.end()) {
+ ASSERT(!it->IsBindless() && it->GetType() == info.type &&
+ it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow &&
+ it->IsBuffer() == info.is_buffer && it->IsIndexed());
+ return &*it;
+ }
- // Otherwise create a new mapping for this sampler
- const auto next_index = static_cast<u32>(used_samplers.size());
- return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
- info.is_shadow, info.is_buffer);
+ uses_indexed_samplers = true;
+ // Otherwise create a new mapping for this sampler
+ const auto next_index = static_cast<u32>(used_samplers.size());
+ return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array,
+ info.is_shadow, info.is_buffer, true);
+ }
+ return nullptr;
}
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -499,8 +531,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
"This method is not supported.");
const SamplerInfo info{texture_type, is_array, is_shadow, false};
- const Sampler* sampler =
- is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info);
+ Node index_var{};
+ const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info)
+ : GetSampler(instr.sampler, info);
Node4 values;
if (sampler == nullptr) {
for (u32 element = 0; element < values.size(); ++element) {
@@ -548,7 +581,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
for (u32 element = 0; element < values.size(); ++element) {
auto copy_coords = coords;
- MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element};
+ MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias,
+ lod, {}, element, index_var};
values[element] = Operation(read_method, meta, std::move(copy_coords));
}
@@ -596,7 +630,7 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
}
- Node dc{};
+ Node dc;
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
// or bias are used
@@ -632,7 +666,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
const Node array = is_array ? GetRegister(array_register) : nullptr;
- Node dc{};
+ Node dc;
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
// or bias are used
@@ -663,7 +697,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
u64 parameter_register = instr.gpr20.Value();
const SamplerInfo info{texture_type, is_array, depth_compare, false};
- const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
+ Node index_var{};
+ const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info)
: GetSampler(instr.sampler, info);
Node4 values;
if (sampler == nullptr) {
@@ -692,7 +727,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{
- *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element};
+ *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
+ index_var};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -725,7 +761,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element};
+ MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
@@ -775,7 +811,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element};
+ MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 9af1f0228..a0a7b9111 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -212,6 +212,7 @@ enum class MetaStackClass {
class OperationNode;
class ConditionalNode;
class GprNode;
+class CustomVarNode;
class ImmediateNode;
class InternalFlagNode;
class PredicateNode;
@@ -223,26 +224,32 @@ class SmemNode;
class GmemNode;
class CommentNode;
-using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
+using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
LmemNode, SmemNode, GmemNode, CommentNode>;
using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>;
+class BindlessSamplerNode;
+class ArraySamplerNode;
+
+using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
+using TrackSampler = std::shared_ptr<TrackSamplerData>;
+
class Sampler {
public:
/// This constructor is for bound samplers
constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow, bool is_buffer)
+ bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_buffer{is_buffer} {}
+ is_buffer{is_buffer}, is_indexed{is_indexed} {}
/// This constructor is for bindless samplers
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow, bool is_buffer)
+ bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
: index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
- is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {}
+ is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
constexpr u32 GetIndex() const {
return index;
@@ -276,16 +283,72 @@ public:
return is_bindless;
}
+ constexpr bool IsIndexed() const {
+ return is_indexed;
+ }
+
+ constexpr u32 Size() const {
+ return size;
+ }
+
+ constexpr void SetSize(u32 new_size) {
+ size = new_size;
+ }
+
private:
u32 index{}; ///< Emulated index given for the this sampler.
u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
+ u32 size{}; ///< Size of the sampler if indexed.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler.
bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
+ bool is_indexed{}; ///< Whether this sampler is an indexed array of textures.
+};
+
+/// Represents a tracked bindless sampler into a direct const buffer
+class ArraySamplerNode final {
+public:
+ explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
+ : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
+
+ constexpr u32 GetIndex() const {
+ return index;
+ }
+
+ constexpr u32 GetBaseOffset() const {
+ return base_offset;
+ }
+
+ constexpr u32 GetIndexVar() const {
+ return bindless_var;
+ }
+
+private:
+ u32 index;
+ u32 base_offset;
+ u32 bindless_var;
+};
+
+/// Represents a tracked bindless sampler into a direct const buffer
+class BindlessSamplerNode final {
+public:
+ explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
+
+ constexpr u32 GetIndex() const {
+ return index;
+ }
+
+ constexpr u32 GetOffset() const {
+ return offset;
+ }
+
+private:
+ u32 index;
+ u32 offset;
};
class Image final {
@@ -380,8 +443,9 @@ struct MetaTexture {
std::vector<Node> derivates;
Node bias;
Node lod;
- Node component{};
+ Node component;
u32 element{};
+ Node index;
};
struct MetaImage {
@@ -488,6 +552,19 @@ private:
Tegra::Shader::Register index{};
};
+/// A custom variable
+class CustomVarNode final {
+public:
+ explicit constexpr CustomVarNode(u32 index) : index{index} {}
+
+ constexpr u32 GetIndex() const {
+ return index;
+ }
+
+private:
+ u32 index{};
+};
+
/// A 32-bits value that represents an immediate value
class ImmediateNode final {
public:
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 0c2aa749b..11231bbea 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) {
return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
}
+template <typename T, typename... Args>
+TrackSampler MakeTrackSampler(Args&&... args) {
+ static_assert(std::is_convertible_v<T, TrackSamplerData>);
+ return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
+}
+
template <typename... Args>
Node Operation(OperationCode code, Args&&... args) {
if constexpr (sizeof...(args) == 0) {
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 31eecb3f4..3a5d280a9 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet
ConstBufferLocker& locker)
: program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
Decode();
+ PostDecode();
}
ShaderIR::~ShaderIR() = default;
@@ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) {
return MakeNode<GprNode>(reg);
}
+Node ShaderIR::GetCustomVariable(u32 id) {
+ return MakeNode<CustomVarNode>(id);
+}
+
Node ShaderIR::GetImmediate19(Instruction instr) {
return Immediate(instr.alu.GetImm20_19());
}
@@ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) {
return id;
}
+u32 ShaderIR::NewCustomVariable() {
+ return num_custom_variables++;
+}
+
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ba1db4c11..b0851c3be 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -180,6 +180,10 @@ public:
return amend_code[index];
}
+ u32 GetNumCustomVariables() const {
+ return num_custom_variables;
+ }
+
private:
friend class ASTDecoder;
@@ -191,6 +195,7 @@ private:
};
void Decode();
+ void PostDecode();
NodeBlock DecodeRange(u32 begin, u32 end);
void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
@@ -235,6 +240,8 @@ private:
/// Generates a node for a passed register.
Node GetRegister(Tegra::Shader::Register reg);
+ /// Generates a node for a custom variable
+ Node GetCustomVariable(u32 id);
/// Generates a node representing a 19-bit immediate value
Node GetImmediate19(Tegra::Shader::Instruction instr);
/// Generates a node representing a 32-bit immediate value
@@ -321,7 +328,7 @@ private:
std::optional<SamplerInfo> sampler_info = std::nullopt);
/// Accesses a texture sampler for a bindless texture.
- const Sampler* GetBindlessSampler(Tegra::Shader::Register reg,
+ const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
std::optional<SamplerInfo> sampler_info = std::nullopt);
/// Accesses an image.
@@ -387,6 +394,9 @@ private:
std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
+ std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
+ s64 cursor);
+
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
@@ -399,6 +409,8 @@ private:
/// Register new amending code and obtain the reference id.
std::size_t DeclareAmend(Node new_amend);
+ u32 NewCustomVariable();
+
const ProgramCode& program_code;
const u32 main_offset;
const CompilerSettings settings;
@@ -414,6 +426,7 @@ private:
NodeBlock global_code;
ASTManager program_manager{true, true};
std::vector<Node> amend_code;
+ u32 num_custom_variables{};
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;
@@ -431,6 +444,7 @@ private:
bool uses_instance_id{};
bool uses_vertex_id{};
bool uses_warps{};
+ bool uses_indexed_samplers{};
Tegra::Shader::Header header;
};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 165c79330..face8c943 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -8,6 +8,7 @@
#include "common/common_types.h"
#include "video_core/shader/node.h"
+#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
}
return {};
}
+
+std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
+ if (operation.GetCode() != OperationCode::UAdd) {
+ return std::nullopt;
+ }
+ Node gpr;
+ Node offset;
+ ASSERT(operation.GetOperandsCount() == 2);
+ for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
+ Node operand = operation[i];
+ if (std::holds_alternative<ImmediateNode>(*operand)) {
+ offset = operation[i];
+ } else if (std::holds_alternative<GprNode>(*operand)) {
+ gpr = operation[i];
+ }
+ }
+ if (offset && gpr) {
+ return std::make_pair(gpr, offset);
+ }
+ return std::nullopt;
+}
+
+bool AmendNodeCv(std::size_t amend_index, Node node) {
+ if (const auto operation = std::get_if<OperationNode>(&*node)) {
+ operation->SetAmendIndex(amend_index);
+ return true;
+ } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+ conditional->SetAmendIndex(amend_index);
+ return true;
+ }
+ return false;
+}
+
} // Anonymous namespace
+std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
+ s64 cursor) {
+ if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
+ // Constant buffer found, test if it's an immediate
+ const auto offset = cbuf->GetOffset();
+ if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
+ auto track =
+ MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
+ return {tracked, track};
+ } else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
+ auto bound_buffer = locker.ObtainBoundBuffer();
+ if (!bound_buffer) {
+ return {};
+ }
+ if (*bound_buffer != cbuf->GetIndex()) {
+ return {};
+ }
+ auto pair = DecoupleIndirectRead(*operation);
+ if (!pair) {
+ return {};
+ }
+ auto [gpr, base_offset] = *pair;
+ const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
+ auto gpu_driver = locker.AccessGuestDriverProfile();
+ if (gpu_driver == nullptr) {
+ return {};
+ }
+ const u32 bindless_cv = NewCustomVariable();
+ const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr,
+ Immediate(gpu_driver->GetTextureHandlerSize()));
+
+ const Node cv_node = GetCustomVariable(bindless_cv);
+ Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
+ const std::size_t amend_index = DeclareAmend(amend_op);
+ AmendNodeCv(amend_index, code[cursor]);
+ // TODO Implement Bindless Index custom variable
+ auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
+ offset_inm->GetValue(), bindless_cv);
+ return {tracked, track};
+ }
+ return {};
+ }
+ if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
+ if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
+ return {};
+ }
+ // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
+ // register that it uses as operand
+ const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
+ if (!source) {
+ return {};
+ }
+ return TrackBindlessSampler(source, code, new_cursor);
+ }
+ if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
+ for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
+ if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
+ std::get<0>(found)) {
+ // Cbuf found in operand.
+ return found;
+ }
+ }
+ return {};
+ }
+ if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
+ const auto& conditional_code = conditional->GetCode();
+ return TrackBindlessSampler(tracked, conditional_code,
+ static_cast<s64>(conditional_code.size()));
+ }
+ return {};
+}
+
std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
s64 cursor) const {
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 8e947394c..a5f81a8a0 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -3,19 +3,32 @@
// Refer to the license.txt file included.
#include <memory>
+#include "common/logging/log.h"
#include "core/core.h"
#include "core/settings.h"
#include "video_core/gpu_asynch.h"
#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+#ifdef HAS_VULKAN
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#endif
#include "video_core/video_core.h"
namespace VideoCore {
std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
Core::System& system) {
- return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
+ switch (Settings::values.renderer_backend) {
+ case Settings::RendererBackend::OpenGL:
+ return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
+#ifdef HAS_VULKAN
+ case Settings::RendererBackend::Vulkan:
+ return std::make_unique<Vulkan::RendererVulkan>(emu_window, system);
+#endif
+ default:
+ return nullptr;
+ }
}
std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) {