aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_block.h1
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h12
-rw-r--r--src/video_core/engines/fermi_2d.cpp3
-rw-r--r--src/video_core/engines/fermi_2d.h3
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/kepler_memory.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp22
-rw-r--r--src/video_core/engines/maxwell_3d.h2
-rw-r--r--src/video_core/engines/maxwell_dma.cpp48
-rw-r--r--src/video_core/engines/maxwell_dma.h9
-rw-r--r--src/video_core/engines/shader_bytecode.h9
-rw-r--r--src/video_core/gpu.cpp16
-rw-r--r--src/video_core/gpu.h11
-rw-r--r--src/video_core/gpu_asynch.cpp5
-rw-r--r--src/video_core/gpu_asynch.h5
-rw-r--r--src/video_core/gpu_synch.cpp5
-rw-r--r--src/video_core/gpu_synch.h5
-rw-r--r--src/video_core/gpu_thread.cpp8
-rw-r--r--src/video_core/gpu_thread.h3
-rw-r--r--src/video_core/morton.cpp116
-rw-r--r--src/video_core/morton.h3
-rw-r--r--src/video_core/rasterizer_interface.h2
-rw-r--r--src/video_core/renderer_base.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_device.h6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp932
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h2
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp89
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/shader/decode/conversion.cpp22
-rw-r--r--src/video_core/shader/decode/float_set.cpp1
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp10
-rw-r--r--src/video_core/shader/decode/integer_set.cpp1
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp1
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp1
-rw-r--r--src/video_core/surface.cpp5
-rw-r--r--src/video_core/texture_cache/surface_params.h1
-rw-r--r--src/video_core/texture_cache/texture_cache.h2
-rw-r--r--src/video_core/textures/decoders.cpp14
-rw-r--r--src/video_core/textures/decoders.h3
-rw-r--r--src/video_core/textures/texture.h2
46 files changed, 722 insertions, 730 deletions
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index d2124443f..4b9193182 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -69,7 +69,6 @@ protected:
private:
CacheAddr cache_addr{};
CacheAddr cache_addr_end{};
- u64 pages{};
std::size_t size{};
u64 epoch{};
};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 38ce16ed5..2442ddfd6 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -18,10 +18,7 @@
#include "video_core/buffer_cache/buffer_block.h"
#include "video_core/buffer_cache/map_interval.h"
#include "video_core/memory_manager.h"
-
-namespace VideoCore {
-class RasterizerInterface;
-}
+#include "video_core/rasterizer_interface.h"
namespace VideoCommon {
@@ -348,7 +345,6 @@ private:
const CacheAddr cache_addr_end = cache_addr + size - 1;
u64 page_start = cache_addr >> block_page_bits;
const u64 page_end = cache_addr_end >> block_page_bits;
- const u64 num_pages = page_end - page_start + 1;
while (page_start <= page_end) {
auto it = blocks.find(page_start);
if (it == blocks.end()) {
@@ -417,7 +413,10 @@ private:
return false;
}
+ VideoCore::RasterizerInterface& rasterizer;
+ Core::System& system;
std::unique_ptr<StreamBuffer> stream_buffer;
+
TBufferType stream_buffer_handle{};
bool invalidated = false;
@@ -441,8 +440,7 @@ private:
std::list<TBuffer> pending_destruction{};
u64 epoch{};
u64 modified_ticks{};
- VideoCore::RasterizerInterface& rasterizer;
- Core::System& system;
+
std::recursive_mutex mutex;
};
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 0ee228e28..98a8b5337 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,8 +10,7 @@
namespace Tegra::Engines {
-Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
- : rasterizer{rasterizer}, memory_manager{memory_manager} {}
+Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 05421d185..0901cf2fa 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -33,7 +33,7 @@ namespace Tegra::Engines {
class Fermi2D final {
public:
- explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
+ explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
~Fermi2D() = default;
/// Write the value to the register identified by method.
@@ -145,7 +145,6 @@ public:
private:
VideoCore::RasterizerInterface& rasterizer;
- MemoryManager& memory_manager;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 44279de00..fa4a7c5c1 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -15,7 +15,7 @@
namespace Tegra::Engines {
KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
- : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
+ : system{system}, upload_state{memory_manager, regs.upload} {}
KeplerMemory::~KeplerMemory() = default;
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index f3bc675a9..e0e25c321 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -65,7 +65,6 @@ public:
private:
Core::System& system;
- MemoryManager& memory_manager;
Upload::State upload_state;
};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 125c53360..f5158d219 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -249,16 +249,10 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
executing_macro = 0;
// Lookup the macro offset
- const u32 entry{(method - MacroRegistersStart) >> 1};
- const auto& search{macro_offsets.find(entry)};
- if (search == macro_offsets.end()) {
- LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
- UNREACHABLE();
- return;
- }
+ const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size();
// Execute the current macro.
- macro_interpreter.Execute(search->second, std::move(parameters));
+ macro_interpreter.Execute(macro_positions[entry], std::move(parameters));
}
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
@@ -421,7 +415,7 @@ void Maxwell3D::ProcessMacroUpload(u32 data) {
}
void Maxwell3D::ProcessMacroBind(u32 data) {
- macro_offsets[regs.macros.entry] = data;
+ macro_positions[regs.macros.entry++] = data;
}
void Maxwell3D::ProcessQueryGet() {
@@ -524,7 +518,7 @@ void Maxwell3D::ProcessQueryCondition() {
void Maxwell3D::ProcessSyncPoint() {
const u32 sync_point = regs.sync_info.sync_point.Value();
const u32 increment = regs.sync_info.increment.Value();
- const u32 cache_flush = regs.sync_info.unknown.Value();
+ [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
if (increment) {
system.GPU().IncrementSyncPoint(sync_point);
}
@@ -626,10 +620,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
- const auto r_type{tic_entry.r_type.Value()};
- const auto g_type{tic_entry.g_type.Value()};
- const auto b_type{tic_entry.b_type.Value()};
- const auto a_type{tic_entry.a_type.Value()};
+ [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()};
+ [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()};
+ [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()};
+ [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()};
// TODO(Subv): Different data types for separate components are not supported
DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1ee982b76..0184342a0 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1270,7 +1270,7 @@ private:
MemoryManager& memory_manager;
/// Start offsets of each macro in macro_memory
- std::unordered_map<u32, u32> macro_offsets;
+ std::array<u32, 0x80> macro_positions = {};
/// Memory for macro code
MacroMemory macro_memory;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a28c04473..ad8453c5f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -5,18 +5,17 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
+#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/memory_manager.h"
-#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
-MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
+MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
+ : system{system}, memory_manager{memory_manager} {}
void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -84,13 +83,17 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.exec.enable_2d == 1);
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
- ASSERT(regs.src_params.size_z == 1);
+ ASSERT(regs.src_params.BlockDepth() == 0);
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
- const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+ const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
const std::size_t src_size = Texture::CalculateSize(
- true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
+ true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
+ const std::size_t src_layer_size = Texture::CalculateSize(
+ true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1,
+ regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
+
const std::size_t dst_size = regs.dst_pitch * regs.y_count;
if (read_buffer.size() < src_size) {
@@ -104,23 +107,23 @@ void MaxwellDMA::HandleCopy() {
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
- Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
- regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
- write_buffer.data(), regs.src_params.BlockHeight(),
- regs.src_params.pos_x, regs.src_params.pos_y);
+ Texture::UnswizzleSubrect(
+ regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
+ read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(),
+ regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
} else {
ASSERT(regs.dst_params.BlockDepth() == 0);
- const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
+ const u32 bytes_per_pixel = regs.src_pitch / regs.x_count;
const std::size_t dst_size = Texture::CalculateSize(
- true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
+ true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t dst_layer_size = Texture::CalculateSize(
- true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
+ true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t src_size = regs.src_pitch * regs.y_count;
@@ -133,14 +136,19 @@ void MaxwellDMA::HandleCopy() {
write_buffer.resize(dst_size);
}
- memory_manager.ReadBlock(source, read_buffer.data(), src_size);
- memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
+ if (Settings::values.use_accurate_gpu_emulation) {
+ memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
+ } else {
+ memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
+ memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
+ }
// If the input is linear and the output is tiled, swizzle the input and copy it over.
- Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
- src_bytes_per_pixel,
- write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
- read_buffer.data(), regs.dst_params.BlockHeight());
+ Texture::SwizzleSubrect(
+ regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel,
+ write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(),
+ regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
}
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 17b015ca7..93808a9bb 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -20,10 +20,6 @@ namespace Tegra {
class MemoryManager;
}
-namespace VideoCore {
-class RasterizerInterface;
-}
-
namespace Tegra::Engines {
/**
@@ -33,8 +29,7 @@ namespace Tegra::Engines {
class MaxwellDMA final {
public:
- explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager);
+ explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
~MaxwellDMA() = default;
/// Write the value to the register identified by method.
@@ -180,8 +175,6 @@ public:
private:
Core::System& system;
- VideoCore::RasterizerInterface& rasterizer;
-
MemoryManager& memory_manager;
std::vector<u8> read_buffer;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index bc8c2a1c5..c3678b9ea 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -886,6 +886,7 @@ union Instruction {
union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
+ BitField<6, 1, u64> neg_b;
BitField<7, 1, u64> abs_a;
BitField<39, 3, u64> pred39;
BitField<42, 1, u64> neg_pred;
@@ -1019,7 +1020,6 @@ union Instruction {
} iset;
union {
- BitField<41, 2, u64> selector; // i2i and i2f only
BitField<45, 1, u64> negate_a;
BitField<49, 1, u64> abs_a;
BitField<10, 2, Register::Size> src_size;
@@ -1045,6 +1045,13 @@ union Instruction {
}
} f2f;
+ union {
+ BitField<41, 2, u64> selector;
+ } int_src;
+
+ union {
+ BitField<41, 1, u64> selector;
+ } float_src;
} conversion;
union {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index c409af194..2c47541cb 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -17,27 +17,15 @@
namespace Tegra {
-u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
- switch (format) {
- case PixelFormat::ABGR8:
- case PixelFormat::BGRA8:
- return 4;
- default:
- return 4;
- }
-
- UNREACHABLE();
-}
-
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
: system{system}, renderer{renderer}, is_async{is_async} {
auto& rasterizer{renderer.Rasterizer()};
memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
- fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
+ fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
- maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
+ maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 0baf2177c..78bc0601a 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -95,14 +95,10 @@ class DebugContext;
struct FramebufferConfig {
enum class PixelFormat : u32 {
ABGR8 = 1,
+ RGB565 = 4,
BGRA8 = 5,
};
- /**
- * Returns the number of bytes per pixel.
- */
- static u32 BytesPerPixel(PixelFormat format);
-
VAddr address;
u32 offset;
u32 width;
@@ -253,8 +249,7 @@ public:
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
/// Swap buffers (render frame)
- virtual void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
+ virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
@@ -285,8 +280,8 @@ private:
protected:
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
- VideoCore::RendererBase& renderer;
Core::System& system;
+ VideoCore::RendererBase& renderer;
private:
std::unique_ptr<Tegra::MemoryManager> memory_manager;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index ea67be831..f2a3a390e 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -23,9 +23,8 @@ void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
-void GPUAsynch::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- gpu_thread.SwapBuffers(std::move(framebuffer));
+void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ gpu_thread.SwapBuffers(framebuffer);
}
void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 36377d677..a12f9bac4 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -14,15 +14,14 @@ class RendererBase;
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU asynchronously
-class GPUAsynch : public Tegra::GPU {
+class GPUAsynch final : public Tegra::GPU {
public:
explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUAsynch() override;
void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index d4ead9c47..d48221077 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -19,9 +19,8 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
dma_pusher->DispatchCalls();
}
-void GPUSynch::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- renderer.SwapBuffers(std::move(framebuffer));
+void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ renderer.SwapBuffers(framebuffer);
}
void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 07bcc47f1..5eb1c461c 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -13,15 +13,14 @@ class RendererBase;
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU synchronously
-class GPUSynch : public Tegra::GPU {
+class GPUSynch final : public Tegra::GPU {
public:
explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUSynch() override;
void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index b441e92b0..5f039e4fd 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -39,7 +39,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
- renderer.SwapBuffers(std::move(data->framebuffer));
+ renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@@ -78,9 +78,9 @@ void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
}
-void ThreadManager::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- PushCommand(SwapBuffersCommand(std::move(framebuffer)));
+void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ PushCommand(SwapBuffersCommand(framebuffer ? *framebuffer
+ : std::optional<const Tegra::FramebufferConfig>{}));
}
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 1d9d0c39e..3ae0ec9f3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -110,8 +110,7 @@ public:
void SubmitList(Tegra::CommandList&& entries);
/// Swap buffers (render frame)
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(CacheAddr addr, u64 size);
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 3e91cbc83..084f85e67 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -25,8 +25,8 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
// pixel values.
- const u32 tile_size_x{GetDefaultBlockWidth(format)};
- const u32 tile_size_y{GetDefaultBlockHeight(format)};
+ constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
+ constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
if constexpr (morton_to_linear) {
Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
@@ -186,99 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
return morton_to_linear_fns[static_cast<std::size_t>(format)];
}
-static u32 MortonInterleave128(u32 x, u32 y) {
- // 128x128 Z-Order coordinate from 2D coordinates
- static constexpr u32 xlut[] = {
- 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
- 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
- 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
- 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
- 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
- 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
- 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
- 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
- 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
- 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
- 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
- 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
- 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
- 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
- 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
- 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
- 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
- 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
- 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
- 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
- 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
- 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
- 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
- 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
- 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
- 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
- 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
- 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
- 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
- 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
- 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
- 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
- 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
- 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
- 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
- };
- static constexpr u32 ylut[] = {
- 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
- 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
- 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
- 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
- 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
- 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
- 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
- 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
- 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
- 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
- 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
- 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
- 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
- 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
- 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
- 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
- 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
- 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
- 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
- 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
- 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
- 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
- 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
- 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
- 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
- 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
- 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
- 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
- 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
- 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
- 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
- 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
- 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
- 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
- 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
- };
- return xlut[x % 128] + ylut[y % 128];
-}
-
-static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
- // Calculates the offset of the position of the pixel in Morton order
- // Framebuffer images are split into 128x128 tiles.
-
- constexpr u32 block_height = 128;
- const u32 coarse_x = x & ~127;
-
- const u32 i = MortonInterleave128(x, y);
-
- const u32 offset = coarse_x * block_height;
-
- return (i + offset) * bytes_per_pixel;
-}
-
void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, u8* addr) {
@@ -286,23 +193,4 @@ void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stri
tile_width_spacing, buffer, addr);
}
-void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
- u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
- const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
- u8* data_ptrs[2];
- for (u32 y = 0; y < height; ++y) {
- for (u32 x = 0; x < width; ++x) {
- const u32 coarse_y = y & ~127;
- const u32 morton_offset =
- GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
- const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel;
-
- data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset;
- data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index];
-
- std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
- }
- }
-}
-
} // namespace VideoCore
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index ee5b45555..b714a7e3f 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -15,7 +15,4 @@ void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat forma
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, u8* addr);
-void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
- u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
-
} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 6e44d51cf..6b3f2d50a 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -50,7 +50,7 @@ public:
/// and invalidated
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
- // Notify the rasterizer to send all written commands to the host GPU.
+ /// Notify the rasterizer to send all written commands to the host GPU.
virtual void FlushCommands() = 0;
/// Notify rasterizer that a frame is about to finish
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 1d54c3723..af1bebc4f 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -36,8 +36,7 @@ public:
virtual ~RendererBase();
/// Swap buffers (render frame)
- virtual void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
+ virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
/// Initialize the renderer
virtual bool Init() = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 0781e6595..f8a807c84 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -8,6 +8,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 03d434b28..4f59a87b4 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -14,12 +14,22 @@
namespace OpenGL {
namespace {
+
template <typename T>
T GetInteger(GLenum pname) {
GLint temporary;
glGetIntegerv(pname, &temporary);
return static_cast<T>(temporary);
}
+
+bool TestProgram(const GLchar* glsl) {
+ const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &glsl)};
+ GLint link_status;
+ glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
+ glDeleteProgram(shader);
+ return link_status == GL_TRUE;
+}
+
} // Anonymous namespace
Device::Device() {
@@ -32,6 +42,11 @@ Device::Device() {
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = TestComponentIndexingBug();
+ has_precise_bug = TestPreciseBug();
+
+ LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
+ LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
+ LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
}
Device::Device(std::nullptr_t) {
@@ -42,30 +57,21 @@ Device::Device(std::nullptr_t) {
has_vertex_viewport_layer = true;
has_variable_aoffi = true;
has_component_indexing_bug = false;
+ has_precise_bug = false;
}
bool Device::TestVariableAoffi() {
- const GLchar* AOFFI_TEST = R"(#version 430 core
+ return TestProgram(R"(#version 430 core
// This is a unit test, please ignore me on apitrace bug reports.
uniform sampler2D tex;
uniform ivec2 variable_offset;
out vec4 output_attribute;
void main() {
output_attribute = textureOffset(tex, vec2(0), variable_offset);
-}
-)";
- const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)};
- GLint link_status{};
- glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
- glDeleteProgram(shader);
-
- const bool supported{link_status == GL_TRUE};
- LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", supported);
- return supported;
+})");
}
bool Device::TestComponentIndexingBug() {
- constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}";
const GLchar* COMPONENT_TEST = R"(#version 430 core
layout (std430, binding = 0) buffer OutputBuffer {
uint output_value;
@@ -105,12 +111,21 @@ void main() {
GLuint result;
glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
if (result != values.at(index)) {
- LOG_INFO(Render_OpenGL, log_message, true);
return true;
}
}
- LOG_INFO(Render_OpenGL, log_message, false);
return false;
}
+bool Device::TestPreciseBug() {
+ return !TestProgram(R"(#version 430 core
+in vec3 coords;
+out float out_value;
+uniform sampler2DShadow tex;
+void main() {
+ precise float tmp_value = vec4(texture(tex, coords)).x;
+ out_value = tmp_value;
+})");
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 3ef7c6dd8..ba6dcd3be 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -46,9 +46,14 @@ public:
return has_component_indexing_bug;
}
+ bool HasPreciseBug() const {
+ return has_precise_bug;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestComponentIndexingBug();
+ static bool TestPreciseBug();
std::size_t uniform_buffer_alignment{};
std::size_t shader_storage_alignment{};
@@ -58,6 +63,7 @@ private:
bool has_vertex_viewport_layer{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
+ bool has_precise_bug{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 019583718..bb09ecd52 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -708,8 +708,6 @@ void RasterizerOpenGL::DrawArrays() {
return;
}
- const auto& regs = gpu.regs;
-
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index a32a7e984..cf6a5cddf 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -249,20 +249,24 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
if (!texture_buffer_usage.test(i)) {
continue;
}
- source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
+ source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i);
+ }
+ if (texture_buffer_usage.any()) {
+ source += '\n';
}
if (program_type == ProgramType::Geometry) {
const auto [glsl_topology, debug_name, max_vertices] =
GetPrimitiveDescription(primitive_mode);
- source += "layout (" + std::string(glsl_topology) + ") in;\n";
+ source += "layout (" + std::string(glsl_topology) + ") in;\n\n";
source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
}
if (program_type == ProgramType::Compute) {
source += "layout (local_size_variable) in;\n";
}
+ source += '\n';
source += code;
OGLShader shader;
@@ -291,7 +295,7 @@ std::set<GLenum> GetSupportedFormats() {
CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
GLShader::ProgramResult result)
- : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr},
+ : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr},
unique_identifier{params.unique_identifier}, program_type{program_type},
disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs},
entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index a3106a0ff..2c8faf855 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -106,7 +106,6 @@ private:
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
- u8* host_ptr{};
VAddr cpu_addr{};
u64 unique_identifier{};
ProgramType program_type{};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 1bfdbcd61..a5cc1a86f 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -39,7 +39,7 @@ using namespace VideoCommon::Shader;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Operation = const OperationNode&;
-enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
+enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
struct TextureAoffi {};
using TextureArgument = std::pair<Type, Node>;
@@ -48,7 +48,7 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>;
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
-class ShaderWriter {
+class ShaderWriter final {
public:
void AddExpression(std::string_view text) {
DEBUG_ASSERT(scope >= 0);
@@ -93,9 +93,157 @@ private:
u32 temporary_index = 1;
};
+class Expression final {
+public:
+ Expression(std::string code, Type type) : code{std::move(code)}, type{type} {
+ ASSERT(type != Type::Void);
+ }
+ Expression() : type{Type::Void} {}
+
+ Type GetType() const {
+ return type;
+ }
+
+ std::string GetCode() const {
+ return code;
+ }
+
+ void CheckVoid() const {
+ ASSERT(type == Type::Void);
+ }
+
+ std::string As(Type type) const {
+ switch (type) {
+ case Type::Bool:
+ return AsBool();
+ case Type::Bool2:
+ return AsBool2();
+ case Type::Float:
+ return AsFloat();
+ case Type::Int:
+ return AsInt();
+ case Type::Uint:
+ return AsUint();
+ case Type::HalfFloat:
+ return AsHalfFloat();
+ default:
+ UNREACHABLE_MSG("Invalid type");
+ return code;
+ }
+ }
+
+ std::string AsBool() const {
+ switch (type) {
+ case Type::Bool:
+ return code;
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsBool2() const {
+ switch (type) {
+ case Type::Bool2:
+ return code;
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsFloat() const {
+ switch (type) {
+ case Type::Float:
+ return code;
+ case Type::Uint:
+ return fmt::format("utof({})", code);
+ case Type::Int:
+ return fmt::format("itof({})", code);
+ case Type::HalfFloat:
+ return fmt::format("utof(packHalf2x16({}))", code);
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsInt() const {
+ switch (type) {
+ case Type::Float:
+ return fmt::format("ftoi({})", code);
+ case Type::Uint:
+ return fmt::format("int({})", code);
+ case Type::Int:
+ return code;
+ case Type::HalfFloat:
+ return fmt::format("int(packHalf2x16({}))", code);
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsUint() const {
+ switch (type) {
+ case Type::Float:
+ return fmt::format("ftou({})", code);
+ case Type::Uint:
+ return code;
+ case Type::Int:
+ return fmt::format("uint({})", code);
+ case Type::HalfFloat:
+ return fmt::format("packHalf2x16({})", code);
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsHalfFloat() const {
+ switch (type) {
+ case Type::Float:
+ return fmt::format("unpackHalf2x16(ftou({}))", code);
+ case Type::Uint:
+ return fmt::format("unpackHalf2x16({})", code);
+ case Type::Int:
+ return fmt::format("unpackHalf2x16(int({}))", code);
+ case Type::HalfFloat:
+ return code;
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+private:
+ std::string code;
+ Type type{};
+};
+
+constexpr const char* GetTypeString(Type type) {
+ switch (type) {
+ case Type::Bool:
+ return "bool";
+ case Type::Bool2:
+ return "bvec2";
+ case Type::Float:
+ return "float";
+ case Type::Int:
+ return "int";
+ case Type::Uint:
+ return "uint";
+ case Type::HalfFloat:
+ return "vec2";
+ default:
+ UNREACHABLE_MSG("Invalid type");
+ return "<invalid type>";
+ }
+}
+
/// Generates code to use for a swizzle operation.
constexpr const char* GetSwizzle(u32 element) {
- constexpr std::array<const char*, 4> swizzle = {".x", ".y", ".z", ".w"};
+ constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
return swizzle.at(element);
}
@@ -134,8 +282,8 @@ constexpr bool IsGenericAttribute(Attribute::Index index) {
return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
}
-constexpr Attribute::Index ToGenericAttribute(u32 value) {
- return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0));
+constexpr Attribute::Index ToGenericAttribute(u64 value) {
+ return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
}
u32 GetGenericAttributeIndex(Attribute::Index index) {
@@ -191,7 +339,7 @@ public:
// VM's program counter
const auto first_address = ir.GetBasicBlocks().begin()->first;
- code.AddLine("uint jmp_to = {}u;", first_address);
+ code.AddLine("uint jmp_to = {}U;", first_address);
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
@@ -199,7 +347,7 @@ public:
constexpr u32 FLOW_STACK_SIZE = 20;
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
- code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
+ code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
}
}
@@ -210,7 +358,7 @@ public:
for (const auto& pair : ir.GetBasicBlocks()) {
const auto [address, bb] = pair;
- code.AddLine("case 0x{:x}u: {{", address);
+ code.AddLine("case 0x{:X}U: {{", address);
++code.scope;
VisitBlock(bb);
@@ -322,7 +470,7 @@ private:
void DeclareRegisters() {
const auto& registers = ir.GetRegisters();
for (const u32 gpr : registers) {
- code.AddLine("float {} = 0;", GetRegister(gpr));
+ code.AddLine("float {} = 0.0f;", GetRegister(gpr));
}
if (!registers.empty()) {
code.AddNewLine();
@@ -348,7 +496,7 @@ private:
return;
}
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
- code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
+ code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
code.AddNewLine();
}
@@ -371,8 +519,6 @@ private:
return "noperspective ";
default:
case AttributeUse::Unused:
- UNREACHABLE_MSG("Unused attribute being fetched");
- return {};
UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
return {};
}
@@ -449,7 +595,7 @@ private:
const auto [index, size] = entry;
code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index,
GetConstBufferBlock(index));
- code.AddLine(" vec4 {}[MAX_CONSTBUFFER_ELEMENTS];", GetConstBuffer(index));
+ code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
code.AddLine("}};");
code.AddNewLine();
}
@@ -470,7 +616,7 @@ private:
code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{",
base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base));
- code.AddLine(" float {}[];", GetGlobalMemory(base));
+ code.AddLine(" uint {}[];", GetGlobalMemory(base));
code.AddLine("}};");
code.AddNewLine();
}
@@ -528,7 +674,7 @@ private:
if (!ir.HasPhysicalAttributes()) {
return;
}
- code.AddLine("float readPhysicalAttribute(uint physical_address) {{");
+ code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
++code.scope;
code.AddLine("switch (physical_address) {{");
@@ -537,15 +683,16 @@ private:
for (u32 index = 0; index < num_attributes; ++index) {
const auto attribute{ToGenericAttribute(index)};
for (u32 element = 0; element < 4; ++element) {
- constexpr u32 generic_base{0x80};
- constexpr u32 generic_stride{16};
- constexpr u32 element_stride{4};
+ constexpr u32 generic_base = 0x80;
+ constexpr u32 generic_stride = 16;
+ constexpr u32 element_stride = 4;
const u32 address{generic_base + index * generic_stride + element * element_stride};
- const bool declared{stage != ProgramType::Fragment ||
- header.ps.GetAttributeUse(index) != AttributeUse::Unused};
- const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
- code.AddLine("case 0x{:x}: return {};", address, value);
+ const bool declared = stage != ProgramType::Fragment ||
+ header.ps.GetAttributeUse(index) != AttributeUse::Unused;
+ const std::string value =
+ declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
+ code.AddLine("case 0x{:X}U: return {};", address, value);
}
}
@@ -565,7 +712,7 @@ private:
case Tegra::Shader::ImageType::Texture1D:
return "image1D";
case Tegra::Shader::ImageType::TextureBuffer:
- return "bufferImage";
+ return "imageBuffer";
case Tegra::Shader::ImageType::Texture1DArray:
return "image1DArray";
case Tegra::Shader::ImageType::Texture2D:
@@ -590,13 +737,11 @@ private:
void VisitBlock(const NodeBlock& bb) {
for (const auto& node : bb) {
- if (const std::string expr = Visit(node); !expr.empty()) {
- code.AddLine(expr);
- }
+ Visit(node).CheckVoid();
}
}
- std::string Visit(const Node& node) {
+ Expression Visit(const Node& node) {
if (const auto operation = std::get_if<OperationNode>(&*node)) {
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
if (operation_index >= operation_decompilers.size()) {
@@ -614,18 +759,18 @@ private:
if (const auto gpr = std::get_if<GprNode>(&*node)) {
const u32 index = gpr->GetIndex();
if (index == Register::ZeroIndex) {
- return "0";
+ return {"0U", Type::Uint};
}
- return GetRegister(index);
+ return {GetRegister(index), Type::Float};
}
if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
const u32 value = immediate->GetValue();
if (value < 10) {
// For eyecandy avoid using hex numbers on single digits
- return fmt::format("utof({}u)", immediate->GetValue());
+ return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
}
- return fmt::format("utof(0x{:x}u)", immediate->GetValue());
+ return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
}
if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
@@ -640,17 +785,18 @@ private:
}
}();
if (predicate->IsNegated()) {
- return fmt::format("!({})", value);
+ return {fmt::format("!({})", value), Type::Bool};
}
- return value;
+ return {value, Type::Bool};
}
if (const auto abuf = std::get_if<AbufNode>(&*node)) {
UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,
"Physical attributes in geometry shaders are not implemented");
if (abuf->IsPhysicalBuffer()) {
- return fmt::format("readPhysicalAttribute(ftou({}))",
- Visit(abuf->GetPhysicalAddress()));
+ return {fmt::format("ReadPhysicalAttribute({})",
+ Visit(abuf->GetPhysicalAddress()).AsUint()),
+ Type::Float};
}
return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
}
@@ -661,59 +807,64 @@ private:
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4);
+ return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+ offset_imm / (4 * 4), (offset_imm / 4) % 4),
+ Type::Uint};
}
if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset));
+ code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
if (!device.HasComponentIndexingBug()) {
- return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
- final_offset, final_offset);
+ return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
+ final_offset, final_offset),
+ Type::Uint};
}
// AMD's proprietary GLSL compiler emits ill code for variable component access.
// To bypass this driver bug generate 4 ifs, one per each component.
const std::string pack = code.GenerateTemporary();
- code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
+ code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
final_offset);
const std::string result = code.GenerateTemporary();
- code.AddLine("float {};", result);
+ code.AddLine("uint {};", result);
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
pack, GetSwizzle(swizzle));
}
- return result;
+ return {result, Type::Uint};
}
UNREACHABLE_MSG("Unmanaged offset node type");
}
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- const std::string real = Visit(gmem->GetRealAddress());
- const std::string base = Visit(gmem->GetBaseAddress());
- const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
- return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+ const std::string real = Visit(gmem->GetRealAddress()).AsUint();
+ const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
+ const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
+ return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
+ Type::Uint};
}
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
}
- return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
+ return {
+ fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
+ Type::Uint};
}
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- return GetInternalFlag(internal_flag->GetFlag());
+ return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
}
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
// It's invalid to call conditional on nested nodes, use an operation instead
- code.AddLine("if ({}) {{", Visit(conditional->GetCondition()));
+ code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
++code.scope;
VisitBlock(conditional->GetCode());
@@ -724,20 +875,21 @@ private:
}
if (const auto comment = std::get_if<CommentNode>(&*node)) {
- return "// " + comment->GetText();
+ code.AddLine("// " + comment->GetText());
+ return {};
}
UNREACHABLE();
return {};
}
- std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
+ Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
const auto GeometryPass = [&](std::string_view name) {
if (stage == ProgramType::Geometry && buffer) {
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
- return fmt::format("gs_{}[ftou({}) % MAX_VERTEX_INPUT]", name, Visit(buffer));
+ return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint());
}
return std::string(name);
};
@@ -746,25 +898,27 @@ private:
case Attribute::Index::Position:
switch (stage) {
case ProgramType::Geometry:
- return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
- GetSwizzle(element));
+ return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
+ GetSwizzle(element)),
+ Type::Float};
case ProgramType::Fragment:
- return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
+ return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)),
+ Type::Float};
default:
UNREACHABLE();
}
case Attribute::Index::PointCoord:
switch (element) {
case 0:
- return "gl_PointCoord.x";
+ return {"gl_PointCoord.x", Type::Float};
case 1:
- return "gl_PointCoord.y";
+ return {"gl_PointCoord.y", Type::Float};
case 2:
case 3:
- return "0";
+ return {"0.0f", Type::Float};
}
UNREACHABLE();
- return "0";
+ return {"0", Type::Int};
case Attribute::Index::TessCoordInstanceIDVertexID:
// TODO(Subv): Find out what the values are for the first two elements when inside a
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
@@ -773,44 +927,49 @@ private:
switch (element) {
case 2:
// Config pack's first value is instance_id.
- return "uintBitsToFloat(config_pack[0])";
+ return {"config_pack[0]", Type::Uint};
case 3:
- return "uintBitsToFloat(gl_VertexID)";
+ return {"gl_VertexID", Type::Int};
}
UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- return "0";
+ return {"0", Type::Int};
case Attribute::Index::FrontFacing:
// TODO(Subv): Find out what the values are for the other elements.
ASSERT(stage == ProgramType::Fragment);
switch (element) {
case 3:
- return "itof(gl_FrontFacing ? -1 : 0)";
+ return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
}
UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
- return "0";
+ return {"0", Type::Int};
default:
if (IsGenericAttribute(attribute)) {
- return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element);
+ return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element),
+ Type::Float};
}
break;
}
UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
- return "0";
+ return {"0", Type::Int};
}
- std::string ApplyPrecise(Operation operation, const std::string& value) {
+ Expression ApplyPrecise(Operation operation, std::string value, Type type) {
if (!IsPrecise(operation)) {
- return value;
+ return {std::move(value), type};
}
- // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
- const std::string precise = stage != ProgramType::Fragment ? "precise " : "";
+ // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
+ // be found in fragment shaders, so we disable precise there. There are vertex shaders that
+ // also fail to build but nobody seems to care about those.
+ // Note: Only bugged drivers will skip precise.
+ const bool disable_precise = device.HasPreciseBug() && stage == ProgramType::Fragment;
- const std::string temporary = code.GenerateTemporary();
- code.AddLine("{}float {} = {};", precise, temporary, value);
- return temporary;
+ std::string temporary = code.GenerateTemporary();
+ code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
+ temporary, value);
+ return {std::move(temporary), type};
}
- std::string VisitOperand(Operation operation, std::size_t operand_index) {
+ Expression VisitOperand(Operation operation, std::size_t operand_index) {
const auto& operand = operation[operand_index];
const bool parent_precise = IsPrecise(operation);
const bool child_precise = IsPrecise(operand);
@@ -819,19 +978,16 @@ private:
return Visit(operand);
}
- const std::string temporary = code.GenerateTemporary();
- code.AddLine("float {} = {};", temporary, Visit(operand));
- return temporary;
- }
-
- std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
- return CastOperand(VisitOperand(operation, operand_index), type);
+ Expression value = Visit(operand);
+ std::string temporary = code.GenerateTemporary();
+ code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
+ return {std::move(temporary), value.GetType()};
}
- std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) {
+ Expression GetOutputAttribute(const AbufNode* abuf) {
switch (const auto attribute = abuf->GetIndex()) {
case Attribute::Index::Position:
- return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false);
+ return {"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float};
case Attribute::Index::LayerViewportPointSize:
switch (abuf->GetElement()) {
case 0:
@@ -841,119 +997,79 @@ private:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
- return std::make_pair("gl_Layer", true);
+ return {"gl_Layer", Type::Int};
case 2:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
- return std::make_pair("gl_ViewportIndex", true);
+ return {"gl_ViewportIndex", Type::Int};
case 3:
UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
- return std::make_pair("gl_PointSize", false);
+ return {"gl_PointSize", Type::Float};
}
return {};
case Attribute::Index::ClipDistances0123:
- return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false);
+ return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float};
case Attribute::Index::ClipDistances4567:
- return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4),
- false);
+ return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float};
default:
if (IsGenericAttribute(attribute)) {
- return std::make_pair(
- GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false);
+ return {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()),
+ Type::Float};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
return {};
}
}
- std::string CastOperand(const std::string& value, Type type) const {
- switch (type) {
- case Type::Bool:
- case Type::Bool2:
- case Type::Float:
- return value;
- case Type::Int:
- return fmt::format("ftoi({})", value);
- case Type::Uint:
- return fmt::format("ftou({})", value);
- case Type::HalfFloat:
- return fmt::format("toHalf2({})", value);
- }
- UNREACHABLE();
- return value;
+ Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
+ Type type_a) {
+ std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string BitwiseCastResult(const std::string& value, Type type,
- bool needs_parenthesis = false) {
- switch (type) {
- case Type::Bool:
- case Type::Bool2:
- case Type::Float:
- if (needs_parenthesis) {
- return fmt::format("({})", value);
- }
- return value;
- case Type::Int:
- return fmt::format("itof({})", value);
- case Type::Uint:
- return fmt::format("utof({})", value);
- case Type::HalfFloat:
- return fmt::format("fromHalf2({})", value);
- }
- UNREACHABLE();
- return value;
- }
-
- std::string GenerateUnary(Operation operation, const std::string& func, Type result_type,
- Type type_a, bool needs_parenthesis = true) {
- const std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0, type_a));
-
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type, needs_parenthesis));
- }
-
- std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
+ Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
+ Type type_a, Type type_b) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
+ Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
+ Type type_a, Type type_b) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string GenerateTernary(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b, Type type_c) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_c = VisitOperand(operation, 2, type_c);
- const std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
+ Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
+ Type type_a, Type type_b, Type type_c) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ const std::string op_c = VisitOperand(operation, 2).As(type_c);
+ std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b, Type type_c, Type type_d) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_c = VisitOperand(operation, 2, type_c);
- const std::string op_d = VisitOperand(operation, 3, type_d);
- const std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
+ Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
+ Type type_a, Type type_b, Type type_c, Type type_d) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ const std::string op_c = VisitOperand(operation, 2).As(type_c);
+ const std::string op_d = VisitOperand(operation, 3).As(type_d);
+ std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
const std::vector<TextureIR>& extras) {
- constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
+ constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -970,17 +1086,17 @@ private:
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
- expr += Visit(operation[i]);
+ expr += Visit(operation[i]).AsFloat();
const std::size_t next = i + 1;
if (next < count)
expr += ", ";
}
if (has_array) {
- expr += ", float(ftoi(" + Visit(meta->array) + "))";
+ expr += ", float(" + Visit(meta->array).AsInt() + ')';
}
if (has_shadow) {
- expr += ", " + Visit(meta->depth_compare);
+ expr += ", " + Visit(meta->depth_compare).AsFloat();
}
expr += ')';
@@ -1011,11 +1127,11 @@ private:
// required to be constant)
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else {
- expr += fmt::format("ftoi({})", Visit(operand));
+ expr += Visit(operand).AsInt();
}
break;
case Type::Float:
- expr += Visit(operand);
+ expr += Visit(operand).AsFloat();
break;
default: {
const auto type_int = static_cast<u32>(type);
@@ -1031,7 +1147,7 @@ private:
if (aoffi.empty()) {
return {};
}
- constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
+ constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
std::string expr = ", ";
expr += coord_constructors.at(aoffi.size() - 1);
expr += '(';
@@ -1044,7 +1160,7 @@ private:
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else if (device.HasVariableAoffi()) {
// Avoid using variable AOFFI on unsupported devices.
- expr += fmt::format("ftoi({})", Visit(operand));
+ expr += Visit(operand).AsInt();
} else {
// Insert 0 on devices not supporting variable AOFFI.
expr += '0';
@@ -1058,328 +1174,314 @@ private:
return expr;
}
- std::string Assign(Operation operation) {
+ Expression Assign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
- std::string target;
- bool is_integer = false;
-
+ Expression target;
if (const auto gpr = std::get_if<GprNode>(&*dest)) {
if (gpr->GetIndex() == Register::ZeroIndex) {
// Writing to Register::ZeroIndex is a no op
return {};
}
- target = GetRegister(gpr->GetIndex());
+ target = {GetRegister(gpr->GetIndex()), Type::Float};
} else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
- const auto result = GetOutputAttribute(abuf);
- if (!result) {
- return {};
- }
- target = result->first;
- is_integer = result->second;
+ target = GetOutputAttribute(abuf);
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
}
- target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
+ target = {
+ fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
+ Type::Uint};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- const std::string real = Visit(gmem->GetRealAddress());
- const std::string base = Visit(gmem->GetBaseAddress());
- const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
- target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+ const std::string real = Visit(gmem->GetRealAddress()).AsUint();
+ const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
+ const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
+ target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
+ Type::Uint};
} else {
UNREACHABLE_MSG("Assign called without a proper target");
}
- if (is_integer) {
- code.AddLine("{} = ftoi({});", target, Visit(src));
- } else {
- code.AddLine("{} = {};", target, Visit(src));
- }
+ code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
return {};
}
template <Type type>
- std::string Add(Operation operation) {
+ Expression Add(Operation operation) {
return GenerateBinaryInfix(operation, "+", type, type, type);
}
template <Type type>
- std::string Mul(Operation operation) {
+ Expression Mul(Operation operation) {
return GenerateBinaryInfix(operation, "*", type, type, type);
}
template <Type type>
- std::string Div(Operation operation) {
+ Expression Div(Operation operation) {
return GenerateBinaryInfix(operation, "/", type, type, type);
}
template <Type type>
- std::string Fma(Operation operation) {
+ Expression Fma(Operation operation) {
return GenerateTernary(operation, "fma", type, type, type, type);
}
template <Type type>
- std::string Negate(Operation operation) {
- return GenerateUnary(operation, "-", type, type, true);
+ Expression Negate(Operation operation) {
+ return GenerateUnary(operation, "-", type, type);
}
template <Type type>
- std::string Absolute(Operation operation) {
- return GenerateUnary(operation, "abs", type, type, false);
+ Expression Absolute(Operation operation) {
+ return GenerateUnary(operation, "abs", type, type);
}
- std::string FClamp(Operation operation) {
+ Expression FClamp(Operation operation) {
return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
Type::Float);
}
- std::string FCastHalf0(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
- return fmt::format("({})[0]", op_a);
+ Expression FCastHalf0(Operation operation) {
+ return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
}
- std::string FCastHalf1(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
- return fmt::format("({})[1]", op_a);
+ Expression FCastHalf1(Operation operation) {
+ return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
}
template <Type type>
- std::string Min(Operation operation) {
+ Expression Min(Operation operation) {
return GenerateBinaryCall(operation, "min", type, type, type);
}
template <Type type>
- std::string Max(Operation operation) {
+ Expression Max(Operation operation) {
return GenerateBinaryCall(operation, "max", type, type, type);
}
- std::string Select(Operation operation) {
- const std::string condition = Visit(operation[0]);
- const std::string true_case = Visit(operation[1]);
- const std::string false_case = Visit(operation[2]);
- const std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
+ Expression Select(Operation operation) {
+ const std::string condition = Visit(operation[0]).AsBool();
+ const std::string true_case = Visit(operation[1]).AsUint();
+ const std::string false_case = Visit(operation[2]).AsUint();
+ std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
- return ApplyPrecise(operation, op_str);
+ return ApplyPrecise(operation, std::move(op_str), Type::Uint);
}
- std::string FCos(Operation operation) {
- return GenerateUnary(operation, "cos", Type::Float, Type::Float, false);
+ Expression FCos(Operation operation) {
+ return GenerateUnary(operation, "cos", Type::Float, Type::Float);
}
- std::string FSin(Operation operation) {
- return GenerateUnary(operation, "sin", Type::Float, Type::Float, false);
+ Expression FSin(Operation operation) {
+ return GenerateUnary(operation, "sin", Type::Float, Type::Float);
}
- std::string FExp2(Operation operation) {
- return GenerateUnary(operation, "exp2", Type::Float, Type::Float, false);
+ Expression FExp2(Operation operation) {
+ return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
}
- std::string FLog2(Operation operation) {
- return GenerateUnary(operation, "log2", Type::Float, Type::Float, false);
+ Expression FLog2(Operation operation) {
+ return GenerateUnary(operation, "log2", Type::Float, Type::Float);
}
- std::string FInverseSqrt(Operation operation) {
- return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float, false);
+ Expression FInverseSqrt(Operation operation) {
+ return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
}
- std::string FSqrt(Operation operation) {
- return GenerateUnary(operation, "sqrt", Type::Float, Type::Float, false);
+ Expression FSqrt(Operation operation) {
+ return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
}
- std::string FRoundEven(Operation operation) {
- return GenerateUnary(operation, "roundEven", Type::Float, Type::Float, false);
+ Expression FRoundEven(Operation operation) {
+ return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
}
- std::string FFloor(Operation operation) {
- return GenerateUnary(operation, "floor", Type::Float, Type::Float, false);
+ Expression FFloor(Operation operation) {
+ return GenerateUnary(operation, "floor", Type::Float, Type::Float);
}
- std::string FCeil(Operation operation) {
- return GenerateUnary(operation, "ceil", Type::Float, Type::Float, false);
+ Expression FCeil(Operation operation) {
+ return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
}
- std::string FTrunc(Operation operation) {
- return GenerateUnary(operation, "trunc", Type::Float, Type::Float, false);
+ Expression FTrunc(Operation operation) {
+ return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
}
template <Type type>
- std::string FCastInteger(Operation operation) {
- return GenerateUnary(operation, "float", Type::Float, type, false);
+ Expression FCastInteger(Operation operation) {
+ return GenerateUnary(operation, "float", Type::Float, type);
}
- std::string ICastFloat(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Float, false);
+ Expression ICastFloat(Operation operation) {
+ return GenerateUnary(operation, "int", Type::Int, Type::Float);
}
- std::string ICastUnsigned(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Uint, false);
+ Expression ICastUnsigned(Operation operation) {
+ return GenerateUnary(operation, "int", Type::Int, Type::Uint);
}
template <Type type>
- std::string LogicalShiftLeft(Operation operation) {
+ Expression LogicalShiftLeft(Operation operation) {
return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
}
- std::string ILogicalShiftRight(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::Uint);
- const std::string op_b = VisitOperand(operation, 1, Type::Uint);
- const std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
+ Expression ILogicalShiftRight(Operation operation) {
+ const std::string op_a = VisitOperand(operation, 0).AsUint();
+ const std::string op_b = VisitOperand(operation, 1).AsUint();
+ std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, Type::Int));
+ return ApplyPrecise(operation, std::move(op_str), Type::Int);
}
- std::string IArithmeticShiftRight(Operation operation) {
+ Expression IArithmeticShiftRight(Operation operation) {
return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
}
template <Type type>
- std::string BitwiseAnd(Operation operation) {
+ Expression BitwiseAnd(Operation operation) {
return GenerateBinaryInfix(operation, "&", type, type, type);
}
template <Type type>
- std::string BitwiseOr(Operation operation) {
+ Expression BitwiseOr(Operation operation) {
return GenerateBinaryInfix(operation, "|", type, type, type);
}
template <Type type>
- std::string BitwiseXor(Operation operation) {
+ Expression BitwiseXor(Operation operation) {
return GenerateBinaryInfix(operation, "^", type, type, type);
}
template <Type type>
- std::string BitwiseNot(Operation operation) {
- return GenerateUnary(operation, "~", type, type, false);
+ Expression BitwiseNot(Operation operation) {
+ return GenerateUnary(operation, "~", type, type);
}
- std::string UCastFloat(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Float, false);
+ Expression UCastFloat(Operation operation) {
+ return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
}
- std::string UCastSigned(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Int, false);
+ Expression UCastSigned(Operation operation) {
+ return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
}
- std::string UShiftRight(Operation operation) {
+ Expression UShiftRight(Operation operation) {
return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
}
template <Type type>
- std::string BitfieldInsert(Operation operation) {
+ Expression BitfieldInsert(Operation operation) {
return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
Type::Int);
}
template <Type type>
- std::string BitfieldExtract(Operation operation) {
+ Expression BitfieldExtract(Operation operation) {
return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
}
template <Type type>
- std::string BitCount(Operation operation) {
- return GenerateUnary(operation, "bitCount", type, type, false);
+ Expression BitCount(Operation operation) {
+ return GenerateUnary(operation, "bitCount", type, type);
}
- std::string HNegate(Operation operation) {
+ Expression HNegate(Operation operation) {
const auto GetNegate = [&](std::size_t index) {
- return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1";
+ return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
};
- const std::string value =
- fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0, Type::HalfFloat),
- GetNegate(1), GetNegate(2));
- return BitwiseCastResult(value, Type::HalfFloat);
- }
-
- std::string HClamp(Operation operation) {
- const std::string value = VisitOperand(operation, 0, Type::HalfFloat);
- const std::string min = VisitOperand(operation, 1, Type::Float);
- const std::string max = VisitOperand(operation, 2, Type::Float);
- const std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
-
- return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
- }
-
- std::string HCastFloat(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::Float);
- return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a);
- }
-
- std::string HUnpack(Operation operation) {
- const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
- const auto value = [&]() -> std::string {
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::H0_H1:
- return operand;
- case Tegra::Shader::HalfType::F32:
- return fmt::format("vec2(fromHalf2({}))", operand);
- case Tegra::Shader::HalfType::H0_H0:
- return fmt::format("vec2({}[0])", operand);
- case Tegra::Shader::HalfType::H1_H1:
- return fmt::format("vec2({}[1])", operand);
- }
- UNREACHABLE();
- return "0";
- }();
- return fmt::format("fromHalf2({})", value);
+ return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
+ GetNegate(1), GetNegate(2)),
+ Type::HalfFloat};
+ }
+
+ Expression HClamp(Operation operation) {
+ const std::string value = VisitOperand(operation, 0).AsHalfFloat();
+ const std::string min = VisitOperand(operation, 1).AsFloat();
+ const std::string max = VisitOperand(operation, 2).AsFloat();
+ std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
+
+ return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
+ }
+
+ Expression HCastFloat(Operation operation) {
+ return {fmt::format("vec2({})", VisitOperand(operation, 0).AsFloat()), Type::HalfFloat};
+ }
+
+ Expression HUnpack(Operation operation) {
+ Expression operand = VisitOperand(operation, 0);
+ switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
+ case Tegra::Shader::HalfType::H0_H1:
+ return operand;
+ case Tegra::Shader::HalfType::F32:
+ return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
+ case Tegra::Shader::HalfType::H0_H0:
+ return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
+ case Tegra::Shader::HalfType::H1_H1:
+ return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
+ }
}
- std::string HMergeF32(Operation operation) {
- return fmt::format("float(toHalf2({})[0])", Visit(operation[0]));
+ Expression HMergeF32(Operation operation) {
+ return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
}
- std::string HMergeH0(Operation operation) {
- return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[1]),
- Visit(operation[0]));
+ Expression HMergeH0(Operation operation) {
+ std::string dest = VisitOperand(operation, 0).AsUint();
+ std::string src = VisitOperand(operation, 1).AsUint();
+ return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint};
}
- std::string HMergeH1(Operation operation) {
- return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[0]),
- Visit(operation[1]));
+ Expression HMergeH1(Operation operation) {
+ std::string dest = VisitOperand(operation, 0).AsUint();
+ std::string src = VisitOperand(operation, 1).AsUint();
+ return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint};
}
- std::string HPack2(Operation operation) {
- return fmt::format("utof(packHalf2x16(vec2({}, {})))", Visit(operation[0]),
- Visit(operation[1]));
+ Expression HPack2(Operation operation) {
+ return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
+ VisitOperand(operation, 1).AsFloat()),
+ Type::HalfFloat};
}
template <Type type>
- std::string LogicalLessThan(Operation operation) {
+ Expression LogicalLessThan(Operation operation) {
return GenerateBinaryInfix(operation, "<", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalEqual(Operation operation) {
+ Expression LogicalEqual(Operation operation) {
return GenerateBinaryInfix(operation, "==", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalLessEqual(Operation operation) {
+ Expression LogicalLessEqual(Operation operation) {
return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalGreaterThan(Operation operation) {
+ Expression LogicalGreaterThan(Operation operation) {
return GenerateBinaryInfix(operation, ">", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalNotEqual(Operation operation) {
+ Expression LogicalNotEqual(Operation operation) {
return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalGreaterEqual(Operation operation) {
+ Expression LogicalGreaterEqual(Operation operation) {
return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type);
}
- std::string LogicalFIsNan(Operation operation) {
- return GenerateUnary(operation, "isnan", Type::Bool, Type::Float, false);
+ Expression LogicalFIsNan(Operation operation) {
+ return GenerateUnary(operation, "isnan", Type::Bool, Type::Float);
}
- std::string LogicalAssign(Operation operation) {
+ Expression LogicalAssign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
@@ -1400,78 +1502,80 @@ private:
target = GetInternalFlag(flag->GetFlag());
}
- code.AddLine("{} = {};", target, Visit(src));
+ code.AddLine("{} = {};", target, Visit(src).AsBool());
return {};
}
- std::string LogicalAnd(Operation operation) {
+ Expression LogicalAnd(Operation operation) {
return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
}
- std::string LogicalOr(Operation operation) {
+ Expression LogicalOr(Operation operation) {
return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
}
- std::string LogicalXor(Operation operation) {
+ Expression LogicalXor(Operation operation) {
return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
}
- std::string LogicalNegate(Operation operation) {
- return GenerateUnary(operation, "!", Type::Bool, Type::Bool, false);
+ Expression LogicalNegate(Operation operation) {
+ return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
}
- std::string LogicalPick2(Operation operation) {
- const std::string pair = VisitOperand(operation, 0, Type::Bool2);
- return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
+ Expression LogicalPick2(Operation operation) {
+ return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
+ VisitOperand(operation, 1).AsUint()),
+ Type::Bool};
}
- std::string LogicalAnd2(Operation operation) {
+ Expression LogicalAnd2(Operation operation) {
return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
}
template <bool with_nan>
- std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
- const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
- Type::HalfFloat, Type::HalfFloat)};
+ Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
+ Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
+ Type::HalfFloat, Type::HalfFloat);
if constexpr (!with_nan) {
return comparison;
}
- return fmt::format("halfFloatNanComparison({}, {}, {})", comparison,
- VisitOperand(operation, 0, Type::HalfFloat),
- VisitOperand(operation, 1, Type::HalfFloat));
+ return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
+ VisitOperand(operation, 0).AsHalfFloat(),
+ VisitOperand(operation, 1).AsHalfFloat()),
+ Type::Bool2};
}
template <bool with_nan>
- std::string Logical2HLessThan(Operation operation) {
+ Expression Logical2HLessThan(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "lessThan");
}
template <bool with_nan>
- std::string Logical2HEqual(Operation operation) {
+ Expression Logical2HEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "equal");
}
template <bool with_nan>
- std::string Logical2HLessEqual(Operation operation) {
+ Expression Logical2HLessEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
}
template <bool with_nan>
- std::string Logical2HGreaterThan(Operation operation) {
+ Expression Logical2HGreaterThan(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "greaterThan");
}
template <bool with_nan>
- std::string Logical2HNotEqual(Operation operation) {
+ Expression Logical2HNotEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "notEqual");
}
template <bool with_nan>
- std::string Logical2HGreaterEqual(Operation operation) {
+ Expression Logical2HGreaterEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
}
- std::string Texture(Operation operation) {
+ Expression Texture(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -1480,10 +1584,10 @@ private:
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
- return expr + GetSwizzle(meta->element);
+ return {expr + GetSwizzle(meta->element), Type::Float};
}
- std::string TextureLod(Operation operation) {
+ Expression TextureLod(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -1492,54 +1596,54 @@ private:
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
- return expr + GetSwizzle(meta->element);
+ return {expr + GetSwizzle(meta->element), Type::Float};
}
- std::string TextureGather(Operation operation) {
+ Expression TextureGather(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
- return GenerateTexture(operation, "Gather",
- {TextureArgument{type, meta->component}, TextureAoffi{}}) +
- GetSwizzle(meta->element);
+ return {GenerateTexture(operation, "Gather",
+ {TextureArgument{type, meta->component}, TextureAoffi{}}) +
+ GetSwizzle(meta->element),
+ Type::Float};
}
- std::string TextureQueryDimensions(Operation operation) {
+ Expression TextureQueryDimensions(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
const std::string sampler = GetSampler(meta->sampler);
- const std::string lod = VisitOperand(operation, 0, Type::Int);
+ const std::string lod = VisitOperand(operation, 0).AsInt();
switch (meta->element) {
case 0:
case 1:
- return fmt::format("itof(int(textureSize({}, {}){}))", sampler, lod,
- GetSwizzle(meta->element));
- case 2:
- return "0";
+ return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
+ Type::Int};
case 3:
- return fmt::format("itof(textureQueryLevels({}))", sampler);
+ return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
}
UNREACHABLE();
- return "0";
+ return {"0", Type::Int};
}
- std::string TextureQueryLod(Operation operation) {
+ Expression TextureQueryLod(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
if (meta->element < 2) {
- return fmt::format("itof(int(({} * vec2(256)){}))",
- GenerateTexture(operation, "QueryLod", {}),
- GetSwizzle(meta->element));
+ return {fmt::format("int(({} * vec2(256)){})",
+ GenerateTexture(operation, "QueryLod", {}),
+ GetSwizzle(meta->element)),
+ Type::Int};
}
- return "0";
+ return {"0", Type::Int};
}
- std::string TexelFetch(Operation operation) {
- constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
+ Expression TexelFetch(Operation operation) {
+ constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
UNIMPLEMENTED_IF(meta->sampler.IsArray());
@@ -1552,7 +1656,7 @@ private:
expr += constructors.at(operation.GetOperandsCount() - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
- expr += VisitOperand(operation, i, Type::Int);
+ expr += VisitOperand(operation, i).AsInt();
const std::size_t next = i + 1;
if (next == count)
expr += ')';
@@ -1565,7 +1669,7 @@ private:
if (meta->lod) {
expr += ", ";
- expr += CastOperand(Visit(meta->lod), Type::Int);
+ expr += Visit(meta->lod).AsInt();
}
expr += ')';
expr += GetSwizzle(meta->element);
@@ -1580,11 +1684,11 @@ private:
code.AddLine("float {} = {};", tmp, expr);
code.AddLine("#endif");
- return tmp;
+ return {tmp, Type::Float};
}
- std::string ImageStore(Operation operation) {
- constexpr std::array<const char*, 4> constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
+ Expression ImageStore(Operation operation) {
+ constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
const auto meta{std::get<MetaImage>(operation.GetMeta())};
std::string expr = "imageStore(";
@@ -1594,7 +1698,7 @@ private:
const std::size_t coords_count{operation.GetOperandsCount()};
expr += constructors.at(coords_count - 1);
for (std::size_t i = 0; i < coords_count; ++i) {
- expr += VisitOperand(operation, i, Type::Int);
+ expr += VisitOperand(operation, i).AsInt();
if (i + 1 < coords_count) {
expr += ", ";
}
@@ -1605,7 +1709,7 @@ private:
UNIMPLEMENTED_IF(values_count != 4);
expr += "vec4(";
for (std::size_t i = 0; i < values_count; ++i) {
- expr += Visit(meta.values.at(i));
+ expr += Visit(meta.values.at(i)).AsFloat();
if (i + 1 < values_count) {
expr += ", ";
}
@@ -1616,52 +1720,52 @@ private:
return {};
}
- std::string Branch(Operation operation) {
+ Expression Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine("jmp_to = 0x{:x}u;", target->GetValue());
+ code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
code.AddLine("break;");
return {};
}
- std::string BranchIndirect(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::Uint);
+ Expression BranchIndirect(Operation operation) {
+ const std::string op_a = VisitOperand(operation, 0).AsUint();
code.AddLine("jmp_to = {};", op_a);
code.AddLine("break;");
return {};
}
- std::string PushFlowStack(Operation operation) {
+ Expression PushFlowStack(Operation operation) {
const auto stack = std::get<MetaStackClass>(operation.GetMeta());
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack),
+ code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
target->GetValue());
return {};
}
- std::string PopFlowStack(Operation operation) {
+ Expression PopFlowStack(Operation operation) {
const auto stack = std::get<MetaStackClass>(operation.GetMeta());
code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
code.AddLine("break;");
return {};
}
- std::string Exit(Operation operation) {
+ Expression Exit(Operation operation) {
if (stage != ProgramType::Fragment) {
code.AddLine("return;");
return {};
}
const auto& used_registers = ir.GetRegisters();
- const auto SafeGetRegister = [&](u32 reg) -> std::string {
+ const auto SafeGetRegister = [&](u32 reg) -> Expression {
// TODO(Rodrigo): Replace with contains once C++20 releases
if (used_registers.find(reg) != used_registers.end()) {
- return GetRegister(reg);
+ return {GetRegister(reg), Type::Float};
}
- return "0.0f";
+ return {"0.0f", Type::Float};
};
UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
@@ -1674,7 +1778,7 @@ private:
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
code.AddLine("FragColor{}[{}] = {};", render_target, component,
- SafeGetRegister(current_reg));
+ SafeGetRegister(current_reg).AsFloat());
++current_reg;
}
}
@@ -1683,14 +1787,14 @@ private:
if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
- code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1));
+ code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
}
code.AddLine("return;");
return {};
}
- std::string Discard(Operation operation) {
+ Expression Discard(Operation operation) {
// Enclose "discard" in a conditional, so that GLSL compilation does not complain
// about unexecuted instructions that may follow this.
code.AddLine("if (true) {{");
@@ -1701,7 +1805,7 @@ private:
return {};
}
- std::string EmitVertex(Operation operation) {
+ Expression EmitVertex(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EmitVertex is expected to be used in a geometry shader.");
@@ -1712,7 +1816,7 @@ private:
return {};
}
- std::string EndPrimitive(Operation operation) {
+ Expression EndPrimitive(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EndPrimitive is expected to be used in a geometry shader.");
@@ -1720,59 +1824,59 @@ private:
return {};
}
- std::string YNegate(Operation operation) {
+ Expression YNegate(Operation operation) {
// Config pack's third value is Y_NEGATE's state.
- return "uintBitsToFloat(config_pack[2])";
+ return {"config_pack[2]", Type::Uint};
}
template <u32 element>
- std::string LocalInvocationId(Operation) {
- return "utof(gl_LocalInvocationID"s + GetSwizzle(element) + ')';
+ Expression LocalInvocationId(Operation) {
+ return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
}
template <u32 element>
- std::string WorkGroupId(Operation) {
- return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
+ Expression WorkGroupId(Operation) {
+ return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
}
- std::string BallotThread(Operation operation) {
- const std::string value = VisitOperand(operation, 0, Type::Bool);
+ Expression BallotThread(Operation operation) {
+ const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
LOG_ERROR(Render_OpenGL,
"Nvidia warp intrinsics are not available and its required by a shader");
// Stub on non-Nvidia devices by simulating all threads voting the same as the active
// one.
- return fmt::format("utof({} ? 0xFFFFFFFFU : 0U)", value);
+ return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
}
- return fmt::format("utof(ballotThreadNV({}))", value);
+ return {fmt::format("ballotThreadNV({})", value), Type::Uint};
}
- std::string Vote(Operation operation, const char* func) {
- const std::string value = VisitOperand(operation, 0, Type::Bool);
+ Expression Vote(Operation operation, const char* func) {
+ const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
LOG_ERROR(Render_OpenGL,
"Nvidia vote intrinsics are not available and its required by a shader");
// Stub with a warp size of one.
- return value;
+ return {value, Type::Bool};
}
- return fmt::format("{}({})", func, value);
+ return {fmt::format("{}({})", func, value), Type::Bool};
}
- std::string VoteAll(Operation operation) {
+ Expression VoteAll(Operation operation) {
return Vote(operation, "allThreadsNV");
}
- std::string VoteAny(Operation operation) {
+ Expression VoteAny(Operation operation) {
return Vote(operation, "anyThreadNV");
}
- std::string VoteEqual(Operation operation) {
+ Expression VoteEqual(Operation operation) {
if (!device.HasWarpIntrinsics()) {
LOG_ERROR(Render_OpenGL,
"Nvidia vote intrinsics are not available and its required by a shader");
// We must return true here since a stub for a theoretical warp size of 1 will always
// return an equal result for all its votes.
- return "true";
+ return {"true", Type::Bool};
}
return Vote(operation, "allThreadsEqualNV");
}
@@ -1973,8 +2077,8 @@ private:
}
std::string GetInternalFlag(InternalFlag flag) const {
- constexpr std::array<const char*, 4> InternalFlagNames = {"zero_flag", "sign_flag",
- "carry_flag", "overflow_flag"};
+ constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
+ "overflow_flag"};
const auto index = static_cast<u32>(flag);
ASSERT(index < static_cast<u32>(InternalFlag::Amount));
@@ -2022,24 +2126,16 @@ private:
std::string GetCommonDeclarations() {
return fmt::format(
- "#define MAX_CONSTBUFFER_ELEMENTS {}\n"
"#define ftoi floatBitsToInt\n"
"#define ftou floatBitsToUint\n"
"#define itof intBitsToFloat\n"
"#define utof uintBitsToFloat\n\n"
- "float fromHalf2(vec2 pair) {{\n"
- " return utof(packHalf2x16(pair));\n"
- "}}\n\n"
- "vec2 toHalf2(float value) {{\n"
- " return unpackHalf2x16(ftou(value));\n"
- "}}\n\n"
- "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
+ "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
" bvec2 is_nan1 = isnan(pair1);\n"
" bvec2 is_nan2 = isnan(pair2);\n"
" return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
"is_nan2.y);\n"
- "}}\n",
- MAX_CONSTBUFFER_ELEMENTS);
+ "}}\n\n");
}
ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 408332f90..4f135fe03 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -184,6 +184,9 @@ GLint GetSwizzleSource(SwizzleSource source) {
}
void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
+ if (params.IsBuffer()) {
+ return;
+ }
glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
@@ -208,6 +211,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(),
nullptr, GL_DYNAMIC_STORAGE_BIT);
glTextureBuffer(texture.handle, internal_format, texture_buffer.handle);
+ break;
case SurfaceTarget::Texture2D:
case SurfaceTarget::TextureCubemap:
glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index ff6ab6988..21324488a 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -51,7 +51,7 @@ public:
}
protected:
- void DecorateSurfaceName();
+ void DecorateSurfaceName() override;
View CreateView(const ViewParams& view_key) override;
View CreateViewInner(const ViewParams& view_key, bool is_proxy);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a05cef3b9..af9684839 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -101,9 +101,7 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
RendererOpenGL::~RendererOpenGL() = default;
-void RendererOpenGL::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
-
+void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
system.GetPerfStats().EndSystemFrame();
// Maintain the rasterizer's state as a priority
@@ -113,9 +111,9 @@ void RendererOpenGL::SwapBuffers(
if (framebuffer) {
// If framebuffer is provided, reload it from memory to a texture
- if (screen_info.texture.width != (GLsizei)framebuffer->get().width ||
- screen_info.texture.height != (GLsizei)framebuffer->get().height ||
- screen_info.texture.pixel_format != framebuffer->get().pixel_format) {
+ if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
+ screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
+ screen_info.texture.pixel_format != framebuffer->pixel_format) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
@@ -149,43 +147,43 @@ void RendererOpenGL::SwapBuffers(
* Loads framebuffer from emulated memory into the active OpenGL texture.
*/
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
- const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)};
- const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
- const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
-
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
framebuffer_crop_rect = framebuffer.crop_rect;
- // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default
- // only allows rows to have a memory alignement of 4.
- ASSERT(framebuffer.stride % 4 == 0);
-
- if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
- // Reset the screen info's display texture to its own permanent texture
- screen_info.display_texture = screen_info.texture.resource.handle;
-
- rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
-
- constexpr u32 linear_bpp = 4;
- VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
- framebuffer.width, framebuffer.height, bytes_per_pixel,
- linear_bpp, Memory::GetPointer(framebuffer_addr),
- gl_framebuffer_data.data());
-
- glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
+ const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
+ if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
+ return;
+ }
- // Update existing texture
- // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
- // they differ from the LCD resolution.
- // TODO: Applications could theoretically crash yuzu here by specifying too large
- // framebuffer sizes. We should make sure that this cannot happen.
- glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
- framebuffer.height, screen_info.texture.gl_format,
- screen_info.texture.gl_type, gl_framebuffer_data.data());
+ // Reset the screen info's display texture to its own permanent texture
+ screen_info.display_texture = screen_info.texture.resource.handle;
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
- }
+ const auto pixel_format{
+ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
+ const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
+ const auto host_ptr{Memory::GetPointer(framebuffer_addr)};
+ rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
+
+ // TODO(Rodrigo): Read this from HLE
+ constexpr u32 block_height_log2 = 4;
+ VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
+ framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
+ gl_framebuffer_data.data(), host_ptr);
+
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
+
+ // Update existing texture
+ // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
+ // they differ from the LCD resolution.
+ // TODO: Applications could theoretically crash yuzu here by specifying too large
+ // framebuffer sizes. We should make sure that this cannot happen.
+ glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
+ framebuffer.height, screen_info.texture.gl_format,
+ screen_info.texture.gl_type, gl_framebuffer_data.data());
+
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
/**
@@ -276,22 +274,29 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
texture.height = framebuffer.height;
texture.pixel_format = framebuffer.pixel_format;
+ const auto pixel_format{
+ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
+ gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
+
GLint internal_format;
switch (framebuffer.pixel_format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
- gl_framebuffer_data.resize(texture.width * texture.height * 4);
+ break;
+ case Tegra::FramebufferConfig::PixelFormat::RGB565:
+ internal_format = GL_RGB565;
+ texture.gl_format = GL_RGB;
+ texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
break;
default:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
- gl_framebuffer_data.resize(texture.width * texture.height * 4);
- LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer pixel format: {}",
- static_cast<u32>(framebuffer.pixel_format));
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
+ static_cast<u32>(framebuffer.pixel_format));
}
texture.resource.Release();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 4aebf2321..9bd086368 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -43,14 +43,13 @@ struct ScreenInfo {
TextureInfo texture;
};
-class RendererOpenGL : public VideoCore::RendererBase {
+class RendererOpenGL final : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
~RendererOpenGL() override;
/// Swap buffers (render frame)
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
/// Initialize the renderer
bool Init() override;
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 8973fbefa..32facd6ba 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -14,6 +14,12 @@ using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
+namespace {
+constexpr OperationCode GetFloatSelector(u64 selector) {
+ return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
+}
+} // Anonymous namespace
+
u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
@@ -22,7 +28,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2I_R:
case OpCode::Id::I2I_C:
case OpCode::Id::I2I_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.selector);
+ UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.alu.saturate_d);
@@ -57,8 +63,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2F_R:
case OpCode::Id::I2F_C:
case OpCode::Id::I2F_IMM: {
+ UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
- UNIMPLEMENTED_IF(instr.conversion.selector);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
@@ -113,8 +119,10 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
}();
if (instr.conversion.src_size == Register::Size::Short) {
- // TODO: figure where extract is sey in the encoding
- value = Operation(OperationCode::FCastHalf0, PRECISE, value);
+ value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
+ std::move(value));
+ } else {
+ ASSERT(instr.conversion.float_src.selector == 0);
}
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
@@ -169,8 +177,10 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
}();
if (instr.conversion.src_size == Register::Size::Short) {
- // TODO: figure where extract is sey in the encoding
- value = Operation(OperationCode::FCastHalf0, PRECISE, value);
+ value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
+ std::move(value));
+ } else {
+ ASSERT(instr.conversion.float_src.selector == 0);
}
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index f5013e44a..5614e8a0d 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -15,7 +15,6 @@ using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
instr.fset.neg_a != 0);
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index 2323052b0..200c2c983 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -16,10 +16,9 @@ using Tegra::Shader::Pred;
u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
- const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
- instr.fsetp.neg_a != 0);
+ Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
+ instr.fsetp.neg_a != 0);
Node op_b = [&]() {
if (instr.is_b_imm) {
return GetImmediate19(instr);
@@ -29,12 +28,13 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
- op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false);
+ op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
// We can't use the constant predicate as destination.
ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b);
+ const Node predicate =
+ GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index 46e3d5905..59809bcd8 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -14,7 +14,6 @@ using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = [&]() {
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index dd20775d7..25e48fef8 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -16,7 +16,6 @@ using Tegra::Shader::Pred;
u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
const Node op_a = GetRegister(instr.gpr8);
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index febbfeb50..84dbc50fe 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -15,7 +15,6 @@ using Tegra::Shader::OpCode;
u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in PSET is not implemented");
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index c50f6354d..4ceb219be 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -445,11 +445,12 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
return PixelFormat::ABGR8U;
+ case Tegra::FramebufferConfig::PixelFormat::RGB565:
+ return PixelFormat::B5G6R5U;
case Tegra::FramebufferConfig::PixelFormat::BGRA8:
return PixelFormat::BGRA8;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
return PixelFormat::ABGR8U;
}
}
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 358d6757c..e7ef66ee2 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -58,7 +58,6 @@ public:
std::size_t GetHostSizeInBytes() const {
std::size_t host_size_in_bytes;
if (GetCompressionType() == SurfaceCompression::Converted) {
- constexpr std::size_t rgb8_bpp = 4ULL;
// ASTC is uncompressed in software, in emulated as RGBA8
host_size_in_bytes = 0;
for (u32 level = 0; level < num_levels; ++level) {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index a3a3770a7..2ec0203d1 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -308,8 +308,6 @@ protected:
if (!guard_render_targets && surface->IsRenderTarget()) {
ManageRenderTargetUnregister(surface);
}
- const GPUVAddr gpu_addr = surface->GetGpuAddr();
- const CacheAddr cache_ptr = surface->GetCacheAddr();
const std::size_t size = surface->GetSizeInBytes();
const VAddr cpu_addr = surface->GetCpuAddr();
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 7e8295944..7df5f1452 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -257,19 +257,21 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
- u32 block_height_bit) {
+ u32 block_height_bit, u32 offset_x, u32 offset_y) {
const u32 block_height = 1U << block_height_bit;
const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
gob_size_x};
for (u32 line = 0; line < subrect_height; ++line) {
+ const u32 dst_y = line + offset_y;
const u32 gob_address_y =
- (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
- ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size;
- const auto& table = legacy_swizzle_table[line % gob_size_y];
+ (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
+ ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
+ const auto& table = legacy_swizzle_table[dst_y % gob_size_y];
for (u32 x = 0; x < subrect_width; ++x) {
+ const u32 dst_x = x + offset_x;
const u32 gob_address =
- gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
- const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
+ gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
+ const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x];
u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
u8* dest_addr = swizzled_data + swizzled_offset;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index eaec9b5a5..f1e3952bc 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -44,7 +44,8 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
/// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
+ u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+ u32 offset_x, u32 offset_y);
/// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e3be018b9..e36bc2c04 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -213,7 +213,7 @@ struct TICEntry {
if (header_version != TICHeaderVersion::OneDBuffer) {
return width_minus_1 + 1;
}
- return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one;
+ return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1;
}
u32 Height() const {