aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_processor.cpp142
-rw-r--r--src/video_core/engines/maxwell_3d.h2
-rw-r--r--src/video_core/engines/shader_bytecode.h9
-rw-r--r--src/video_core/gpu.cpp8
-rw-r--r--src/video_core/macro_interpreter.cpp2
-rw-r--r--src/video_core/morton.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp116
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp45
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp754
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h3
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp2
-rw-r--r--src/video_core/surface.cpp7
-rw-r--r--src/video_core/surface.h552
-rw-r--r--src/video_core/textures/decoders.cpp2
18 files changed, 836 insertions, 837 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
deleted file mode 100644
index 8b9c548cc..000000000
--- a/src/video_core/command_processor.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-#include <cstddef>
-#include <memory>
-#include <utility>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "common/microprofile.h"
-#include "common/vector_math.h"
-#include "core/memory.h"
-#include "core/tracer/recorder.h"
-#include "video_core/command_processor.h"
-#include "video_core/engines/fermi_2d.h"
-#include "video_core/engines/kepler_memory.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/maxwell_compute.h"
-#include "video_core/engines/maxwell_dma.h"
-#include "video_core/gpu.h"
-#include "video_core/renderer_base.h"
-#include "video_core/video_core.h"
-
-namespace Tegra {
-
-enum class BufferMethods {
- BindObject = 0,
- CountBufferMethods = 0x40,
-};
-
-MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
-
-void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
- MICROPROFILE_SCOPE(ProcessCommandLists);
-
- // On entering GPU code, assume all memory may be touched by the ARM core.
- maxwell_3d->dirty_flags.OnMemoryWrite();
-
- auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
- LOG_TRACE(HW_GPU,
- "Processing method {:08X} on subchannel {} value "
- "{:08X} remaining params {}",
- method, subchannel, value, remaining_params);
-
- ASSERT(subchannel < bound_engines.size());
-
- if (method == static_cast<u32>(BufferMethods::BindObject)) {
- // Bind the current subchannel to the desired engine id.
- LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
- bound_engines[subchannel] = static_cast<EngineID>(value);
- return;
- }
-
- if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
- // TODO(Subv): Research and implement these methods.
- LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
- return;
- }
-
- const EngineID engine = bound_engines[subchannel];
-
- switch (engine) {
- case EngineID::FERMI_TWOD_A:
- fermi_2d->WriteReg(method, value);
- break;
- case EngineID::MAXWELL_B:
- maxwell_3d->WriteReg(method, value, remaining_params);
- break;
- case EngineID::MAXWELL_COMPUTE_B:
- maxwell_compute->WriteReg(method, value);
- break;
- case EngineID::MAXWELL_DMA_COPY_A:
- maxwell_dma->WriteReg(method, value);
- break;
- case EngineID::KEPLER_INLINE_TO_MEMORY_B:
- kepler_memory->WriteReg(method, value);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented engine");
- }
- };
-
- for (auto entry : commands) {
- Tegra::GPUVAddr address = entry.Address();
- u32 size = entry.sz;
- const std::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
- VAddr current_addr = *head_address;
- while (current_addr < *head_address + size * sizeof(CommandHeader)) {
- const CommandHeader header = {Memory::Read32(current_addr)};
- current_addr += sizeof(u32);
-
- switch (header.mode.Value()) {
- case SubmissionMode::IncreasingOld:
- case SubmissionMode::Increasing: {
- // Increase the method value with each argument.
- for (unsigned i = 0; i < header.arg_count; ++i) {
- WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - i - 1);
- current_addr += sizeof(u32);
- }
- break;
- }
- case SubmissionMode::NonIncreasingOld:
- case SubmissionMode::NonIncreasing: {
- // Use the same method value for all arguments.
- for (unsigned i = 0; i < header.arg_count; ++i) {
- WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - i - 1);
- current_addr += sizeof(u32);
- }
- break;
- }
- case SubmissionMode::IncreaseOnce: {
- ASSERT(header.arg_count.Value() >= 1);
-
- // Use the original method for the first argument and then the next method for all
- // other arguments.
- WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - 1);
- current_addr += sizeof(u32);
-
- for (unsigned i = 1; i < header.arg_count; ++i) {
- WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - i - 1);
- current_addr += sizeof(u32);
- }
- break;
- }
- case SubmissionMode::Inline: {
- // The register value is stored in the bits 16-28 as an immediate
- WriteReg(header.method, header.subchannel, header.inline_data, 0);
- break;
- }
- default:
- UNIMPLEMENTED();
- }
- }
- }
-}
-
-} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 25bb7604a..0faff6fdf 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -164,6 +164,7 @@ public:
return 3;
default:
UNREACHABLE();
+ return 1;
}
}
@@ -871,6 +872,7 @@ public:
return 4;
}
UNREACHABLE();
+ return 1;
}
GPUVAddr StartAddress() const {
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index b9faaf8e0..eb703bb5a 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -575,7 +575,7 @@ union Instruction {
union {
BitField<39, 2, u64> tab5cb8_2;
- BitField<41, 3, u64> tab5c68_1;
+ BitField<41, 3, u64> postfactor;
BitField<44, 2, u64> tab5c68_0;
BitField<48, 1, u64> negate_b;
} fmul;
@@ -609,7 +609,7 @@ union Instruction {
BitField<31, 1, u64> negate_b;
BitField<30, 1, u64> abs_b;
- BitField<47, 2, HalfType> type_b;
+ BitField<28, 2, HalfType> type_b;
BitField<35, 2, HalfType> type_c;
} alu_half;
@@ -1049,6 +1049,7 @@ union Instruction {
BitField<49, 1, u64> nodep_flag;
BitField<50, 3, u64> component_mask_selector;
BitField<53, 4, u64> texture_info;
+ BitField<60, 1, u64> fp32_flag;
TextureType GetTextureType() const {
// The TEXS instruction has a weird encoding for the texture type.
@@ -1064,6 +1065,7 @@ union Instruction {
LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}",
static_cast<u32>(texture_info.Value()));
UNREACHABLE();
+ return TextureType::Texture1D;
}
TextureProcessMode GetTextureProcessMode() const {
@@ -1144,6 +1146,7 @@ union Instruction {
LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}",
static_cast<u32>(texture_info.Value()));
UNREACHABLE();
+ return TextureType::Texture1D;
}
TextureProcessMode GetTextureProcessMode() const {
@@ -1549,7 +1552,7 @@ private:
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
- INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
+ INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index fd1242333..08cf6268f 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -102,6 +102,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
return 1;
default:
UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format));
+ return 1;
}
}
@@ -119,6 +120,7 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) {
return 2;
default:
UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format));
+ return 1;
}
}
@@ -141,6 +143,12 @@ void GPU::CallMethod(const MethodCall& method_call) {
return;
}
+ if (method_call.method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
+ // TODO(Subv): Research and implement these methods.
+ LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
+ return;
+ }
+
const EngineID engine = bound_engines[method_call.subchannel];
switch (engine) {
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 9c55e9f1e..64f75db43 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -171,6 +171,7 @@ u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b)
default:
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", static_cast<u32>(operation));
+ return 0;
}
}
@@ -268,6 +269,7 @@ bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value)
return value != 0;
}
UNREACHABLE();
+ return true;
}
} // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index a310491a8..47e76d8fe 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -192,6 +192,7 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
return linear_to_morton_fns[static_cast<std::size_t>(format)];
}
UNREACHABLE();
+ return morton_to_linear_fns[static_cast<std::size_t>(format)];
}
/// 8x8 Z-Order coordinate from 2D coordinates
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9e93bd609..2b29fc45f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -79,6 +79,26 @@ struct DrawParameters {
}
};
+struct FramebufferCacheKey {
+ bool is_single_buffer = false;
+ bool stencil_enable = false;
+
+ std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{};
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{};
+ u32 colors_count = 0;
+
+ GLuint zeta = 0;
+
+ auto Tie() const {
+ return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count,
+ zeta);
+ }
+
+ bool operator<(const FramebufferCacheKey& rhs) const {
+ return Tie() < rhs.Tie();
+ }
+};
+
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
: res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE) {
@@ -90,9 +110,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
OpenGLState::ApplyDefaultState();
- // Create render framebuffer
- framebuffer.Create();
-
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
@@ -361,6 +378,44 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
SyncClipEnabled(clip_distances);
}
+void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
+ OpenGLState& current_state) {
+ const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey);
+ auto& framebuffer = entry->second;
+
+ if (is_cache_miss)
+ framebuffer.Create();
+
+ current_state.draw.draw_framebuffer = framebuffer.handle;
+ current_state.ApplyFramebufferState();
+
+ if (!is_cache_miss)
+ return;
+
+ if (fbkey.is_single_buffer) {
+ if (fbkey.color_attachments[0] != GL_NONE) {
+ glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0],
+ 0);
+ }
+ glDrawBuffer(fbkey.color_attachments[0]);
+ } else {
+ for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ if (fbkey.colors[index]) {
+ glFramebufferTexture(GL_DRAW_FRAMEBUFFER,
+ GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
+ fbkey.colors[index], 0);
+ }
+ }
+ glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data());
+ }
+
+ if (fbkey.zeta) {
+ GLenum zeta_attachment =
+ fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
+ glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0);
+ }
+}
+
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
@@ -444,10 +499,10 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0);
// Bind the framebuffer surfaces
- current_state.draw.draw_framebuffer = framebuffer.handle;
- current_state.ApplyFramebufferState();
current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
+ FramebufferCacheKey fbkey;
+
if (using_color_fb) {
if (single_color_target) {
// Used when just a single color attachment is enabled, e.g. for clearing a color buffer
@@ -463,14 +518,12 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
}
- glFramebufferTexture2D(
- GL_DRAW_FRAMEBUFFER,
- GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
- color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
- glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target));
+ fbkey.is_single_buffer = true;
+ fbkey.color_attachments[0] =
+ GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target);
+ fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0;
} else {
// Multiple color attachments are enabled
- std::array<GLenum, Maxwell::NumRenderTargets> buffers;
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
@@ -485,22 +538,17 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
color_surface->GetSurfaceParams().srgb_conversion;
}
- buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
- glFramebufferTexture2D(
- GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
- GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0,
- 0);
+ fbkey.color_attachments[index] =
+ GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
+ fbkey.colors[index] =
+ color_surface != nullptr ? color_surface->Texture().handle : 0;
}
- glDrawBuffers(regs.rt_control.count, buffers.data());
+ fbkey.is_single_buffer = false;
+ fbkey.colors_count = regs.rt_control.count;
}
} else {
- // No color attachments are enabled - zero out all of them
- for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
- GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
- 0, 0);
- }
- glDrawBuffer(GL_NONE);
+ // No color attachments are enabled - leave them as zero
+ fbkey.is_single_buffer = true;
}
if (depth_surface) {
@@ -508,22 +556,12 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
// the shader doesn't actually write to it.
depth_surface->MarkAsModified(true, res_cache);
- if (regs.stencil_enable) {
- // Attach both depth and stencil
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->Texture().handle, 0);
- } else {
- // Attach depth
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->Texture().handle, 0);
- // Clear stencil attachment
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- }
- } else {
- // Clear both depth and stencil attachment
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
+ fbkey.zeta = depth_surface->Texture().handle;
+ fbkey.stencil_enable = regs.stencil_enable;
}
+
+ SetupCachedFramebuffer(fbkey, current_state);
+
SyncViewport(current_state);
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 988fa3e27..8a891ffc7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -40,6 +40,7 @@ namespace OpenGL {
struct ScreenInfo;
struct DrawParameters;
+struct FramebufferCacheKey;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
@@ -195,11 +196,12 @@ private:
OGLVertexArray>
vertex_array_cache;
+ std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
+
std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
- OGLFramebuffer framebuffer;
PrimitiveAssembler primitive_assembler{buffer_cache};
GLint uniform_buffer_alignment;
@@ -214,6 +216,8 @@ private:
void SetupShaders(GLenum primitive_mode);
+ void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state);
+
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 5f4cdd119..7ea07631a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -101,8 +101,18 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
params.srgb_conversion);
+
+ if (params.pixel_format == PixelFormat::R16U && config.tsc.depth_compare_enabled) {
+ // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled,
+ // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also
+ // causes GetFormatType to properly return 'Depth' below).
+ params.pixel_format = PixelFormat::Z16;
+ }
+
params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
params.type = GetFormatType(params.pixel_format);
+ UNIMPLEMENTED_IF(params.type == SurfaceType::ColorTexture && config.tsc.depth_compare_enabled);
+
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
params.unaligned_height = config.tic.Height();
@@ -257,7 +267,7 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U
- {GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
+ {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 038b25c75..aea6bf1af 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -2,7 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <boost/functional/hash.hpp>
#include "common/assert.h"
+#include "common/hash.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
@@ -66,14 +68,17 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
// stage here.
setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
case Maxwell::ShaderProgram::VertexB:
+ CalculateProperties();
program_result = GLShader::GenerateVertexShader(setup);
gl_type = GL_VERTEX_SHADER;
break;
case Maxwell::ShaderProgram::Geometry:
+ CalculateProperties();
program_result = GLShader::GenerateGeometryShader(setup);
gl_type = GL_GEOMETRY_SHADER;
break;
case Maxwell::ShaderProgram::Fragment:
+ CalculateProperties();
program_result = GLShader::GenerateFragmentShader(setup);
gl_type = GL_FRAGMENT_SHADER;
break;
@@ -140,6 +145,46 @@ GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
return target_program.handle;
};
+static bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+ // sched instructions appear once every 4 instructions.
+ static constexpr std::size_t SchedPeriod = 4;
+ const std::size_t absolute_offset = offset - main_offset;
+ return (absolute_offset % SchedPeriod) == 0;
+}
+
+static std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
+ constexpr std::size_t start_offset = 10;
+ std::size_t offset = start_offset;
+ std::size_t size = start_offset * sizeof(u64);
+ while (offset < program.size()) {
+ const u64 inst = program[offset];
+ if (!IsSchedInstruction(offset, start_offset)) {
+ if (inst == 0 || (inst >> 52) == 0x50b) {
+ break;
+ }
+ }
+ size += sizeof(inst);
+ offset++;
+ }
+ return size;
+}
+
+void CachedShader::CalculateProperties() {
+ setup.program.real_size = CalculateProgramSize(setup.program.code);
+ setup.program.real_size_b = 0;
+ setup.program.unique_identifier = Common::CityHash64(
+ reinterpret_cast<const char*>(setup.program.code.data()), setup.program.real_size);
+ if (program_type == Maxwell::ShaderProgram::VertexA) {
+ std::size_t seed = 0;
+ boost::hash_combine(seed, setup.program.unique_identifier);
+ setup.program.real_size_b = CalculateProgramSize(setup.program.code_b);
+ const u64 identifier_b = Common::CityHash64(
+ reinterpret_cast<const char*>(setup.program.code_b.data()), setup.program.real_size_b);
+ boost::hash_combine(seed, identifier_b);
+ setup.program.unique_identifier = static_cast<u64>(seed);
+ }
+}
+
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 08f470de3..de3671acf 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -67,6 +67,7 @@ public:
6, "ShaderTrianglesAdjacency");
default:
UNREACHABLE_MSG("Unknown primitive mode.");
+ return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
}
}
@@ -81,6 +82,8 @@ private:
GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
u32 max_vertices, const std::string& debug_name);
+ void CalculateProperties();
+
VAddr addr;
std::size_t shader_length;
Maxwell::ShaderProgram program_type;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index d235bfcd4..4e685fa2c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -50,6 +50,14 @@ public:
using std::runtime_error::runtime_error;
};
+/// Generates code to use for a swizzle operation.
+static std::string GetSwizzle(u64 elem) {
+ ASSERT(elem <= 3);
+ std::string swizzle = ".";
+ swizzle += "xyzw"[elem];
+ return swizzle;
+}
+
/// Translate topology
static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
@@ -356,6 +364,7 @@ public:
return value;
default:
UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
+ return value;
}
}
@@ -618,6 +627,7 @@ public:
return "floatBitsToInt(" + value + ')';
} else {
UNREACHABLE();
+ return value;
}
}
@@ -920,7 +930,7 @@ private:
case Attribute::Index::FrontFacing:
// TODO(Subv): Find out what the values are for the other elements.
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
- return "vec4(0, 0, 0, uintBitsToFloat(gl_FrontFacing ? 1 : 0))";
+ return "vec4(0, 0, 0, intBitsToFloat(gl_FrontFacing ? -1 : 0))";
default:
const u32 index{static_cast<u32>(attribute) -
static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -1004,14 +1014,6 @@ private:
}
}
- /// Generates code to use for a swizzle operation.
- static std::string GetSwizzle(u64 elem) {
- ASSERT(elem <= 3);
- std::string swizzle = ".";
- swizzle += "xyzw"[elem];
- return swizzle;
- }
-
ShaderWriter& shader;
ShaderWriter& declarations;
std::vector<GLSLRegister> regs;
@@ -1343,7 +1345,7 @@ private:
regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
}
- void WriteTexsInstruction(const Instruction& instr, const std::string& texture) {
+ void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) {
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
@@ -1368,6 +1370,38 @@ private:
}
}
+ void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) {
+ // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
+ // float instruction).
+
+ std::array<std::string, 4> components;
+ u32 written_components = 0;
+
+ for (u32 component = 0; component < 4; ++component) {
+ if (!instr.texs.IsComponentEnabled(component))
+ continue;
+ components[written_components++] = texture + GetSwizzle(component);
+ }
+ if (written_components == 0)
+ return;
+
+ const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) {
+ return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')';
+ };
+
+ regs.SetRegisterToHalfFloat(
+ instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1),
+ Tegra::Shader::HalfMerge::H0_H1, 1, 1);
+
+ if (written_components > 2) {
+ ASSERT(instr.texs.HasTwoDestinations());
+ regs.SetRegisterToHalfFloat(
+ instr.gpr28, 0,
+ BuildComponent(components[2], components[3], written_components > 3),
+ Tegra::Shader::HalfMerge::H0_H1, 1, 1);
+ }
+ }
+
static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) {
switch (texture_type) {
case Tegra::Shader::TextureType::Texture1D:
@@ -1515,6 +1549,252 @@ private:
}
}
+ std::pair<size_t, std::string> ValidateAndGetCoordinateElement(
+ const Tegra::Shader::TextureType texture_type, const bool depth_compare,
+ const bool is_array, const bool lod_bias_enabled, size_t max_coords, size_t max_inputs) {
+ const size_t coord_count = TextureCoordinates(texture_type);
+
+ size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
+ const size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
+ if (total_coord_count > max_coords || total_reg_count > max_inputs) {
+ UNIMPLEMENTED_MSG("Unsupported Texture operation");
+ total_coord_count = std::min(total_coord_count, max_coords);
+ }
+ // 1D.DC opengl is using a vec3 but 2nd component is ignored later.
+ total_coord_count +=
+ (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D)
+ ? 1
+ : 0;
+
+ constexpr std::array<const char*, 5> coord_container{
+ {"", "float coord = (", "vec2 coord = vec2(", "vec3 coord = vec3(",
+ "vec4 coord = vec4("}};
+
+ return std::pair<size_t, std::string>(coord_count, coord_container[total_coord_count]);
+ }
+
+ std::string GetTextureCode(const Tegra::Shader::Instruction& instr,
+ const Tegra::Shader::TextureType texture_type,
+ const Tegra::Shader::TextureProcessMode process_mode,
+ const bool depth_compare, const bool is_array,
+ const size_t bias_offset) {
+
+ if ((texture_type == Tegra::Shader::TextureType::Texture3D &&
+ (is_array || depth_compare)) ||
+ (texture_type == Tegra::Shader::TextureType::TextureCube && is_array &&
+ depth_compare)) {
+ UNIMPLEMENTED_MSG("This method is not supported.");
+ }
+
+ const std::string sampler =
+ GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+
+ const bool lod_needed = process_mode == Tegra::Shader::TextureProcessMode::LZ ||
+ process_mode == Tegra::Shader::TextureProcessMode::LL ||
+ process_mode == Tegra::Shader::TextureProcessMode::LLA;
+
+ // LOD selection (either via bias or explicit textureLod) not supported in GL for
+ // sampler2DArrayShadow and samplerCubeArrayShadow.
+ const bool gl_lod_supported = !(
+ (texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
+ (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
+
+ const std::string read_method = lod_needed && gl_lod_supported ? "textureLod(" : "texture(";
+ std::string texture = read_method + sampler + ", coord";
+
+ UNIMPLEMENTED_IF(process_mode != Tegra::Shader::TextureProcessMode::None &&
+ !gl_lod_supported);
+
+ if (process_mode != Tegra::Shader::TextureProcessMode::None && gl_lod_supported) {
+ if (process_mode == Tegra::Shader::TextureProcessMode::LZ) {
+ texture += ", 0.0";
+ } else {
+ // If present, lod or bias are always stored in the register indexed by the
+ // gpr20
+ // field with an offset depending on the usage of the other registers
+ texture += ',' + regs.GetRegisterAsFloat(instr.gpr20.Value() + bias_offset);
+ }
+ }
+ texture += ")";
+ return texture;
+ }
+
+ std::pair<std::string, std::string> GetTEXCode(
+ const Instruction& instr, const Tegra::Shader::TextureType texture_type,
+ const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
+ const bool is_array) {
+ const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
+ process_mode != Tegra::Shader::TextureProcessMode::LZ);
+
+ const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
+ texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
+ // If enabled arrays index is always stored in the gpr8 field
+ const u64 array_register = instr.gpr8.Value();
+ // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+ const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+ std::string coord = coord_dcl;
+ for (size_t i = 0; i < coord_count;) {
+ coord += regs.GetRegisterAsFloat(coord_register + i);
+ ++i;
+ if (i != coord_count) {
+ coord += ',';
+ }
+ }
+ // 1D.DC in opengl the 2nd component is ignored.
+ if (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) {
+ coord += ",0.0";
+ }
+ if (is_array) {
+ coord += ',' + regs.GetRegisterAsInteger(array_register);
+ }
+ if (depth_compare) {
+ // Depth is always stored in the register signaled by gpr20
+ // or in the next register if lod or bias are used
+ const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+ coord += ',' + regs.GetRegisterAsFloat(depth_register);
+ }
+ coord += ");";
+ return std::make_pair(
+ coord, GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0));
+ }
+
+ std::pair<std::string, std::string> GetTEXSCode(
+ const Instruction& instr, const Tegra::Shader::TextureType texture_type,
+ const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
+ const bool is_array) {
+ const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
+ process_mode != Tegra::Shader::TextureProcessMode::LZ);
+
+ const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
+ texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
+ // If enabled arrays index is always stored in the gpr8 field
+ const u64 array_register = instr.gpr8.Value();
+ // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
+ const u64 coord_register = array_register + (is_array ? 1 : 0);
+ const u64 last_coord_register =
+ (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
+ ? static_cast<u64>(instr.gpr20.Value())
+ : coord_register + 1;
+
+ std::string coord = coord_dcl;
+ for (size_t i = 0; i < coord_count; ++i) {
+ const bool last = (i == (coord_count - 1)) && (coord_count > 1);
+ coord += regs.GetRegisterAsFloat(last ? last_coord_register : coord_register + i);
+ if (i < coord_count - 1) {
+ coord += ',';
+ }
+ }
+
+ if (is_array) {
+ coord += ',' + regs.GetRegisterAsInteger(array_register);
+ }
+ if (depth_compare) {
+ // Depth is always stored in the register signaled by gpr20
+ // or in the next register if lod or bias are used
+ const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+ coord += ',' + regs.GetRegisterAsFloat(depth_register);
+ }
+ coord += ");";
+
+ return std::make_pair(coord,
+ GetTextureCode(instr, texture_type, process_mode, depth_compare,
+ is_array, (coord_count > 2 ? 1 : 0)));
+ }
+
+ std::pair<std::string, std::string> GetTLD4Code(const Instruction& instr,
+ const Tegra::Shader::TextureType texture_type,
+ const bool depth_compare, const bool is_array) {
+
+ const size_t coord_count = TextureCoordinates(texture_type);
+ const size_t total_coord_count = coord_count + (is_array ? 1 : 0);
+ const size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
+
+ constexpr std::array<const char*, 5> coord_container{
+ {"", "", "vec2 coord = vec2(", "vec3 coord = vec3(", "vec4 coord = vec4("}};
+
+ // If enabled arrays index is always stored in the gpr8 field
+ const u64 array_register = instr.gpr8.Value();
+ // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+ const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+ std::string coord = coord_container[total_coord_count];
+ for (size_t i = 0; i < coord_count;) {
+ coord += regs.GetRegisterAsFloat(coord_register + i);
+ ++i;
+ if (i != coord_count) {
+ coord += ',';
+ }
+ }
+
+ if (is_array) {
+ coord += ',' + regs.GetRegisterAsInteger(array_register);
+ }
+ coord += ");";
+
+ const std::string sampler =
+ GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+
+ std::string texture = "textureGather(" + sampler + ", coord, ";
+ if (depth_compare) {
+ // Depth is always stored in the register signaled by gpr20
+ texture += regs.GetRegisterAsFloat(instr.gpr20.Value()) + ')';
+ } else {
+ texture += std::to_string(instr.tld4.component) + ')';
+ }
+ return std::make_pair(coord, texture);
+ }
+
+ std::pair<std::string, std::string> GetTLDSCode(const Instruction& instr,
+ const Tegra::Shader::TextureType texture_type,
+ const bool is_array) {
+
+ const size_t coord_count = TextureCoordinates(texture_type);
+ const size_t total_coord_count = coord_count + (is_array ? 1 : 0);
+ const bool lod_enabled =
+ instr.tlds.GetTextureProcessMode() == Tegra::Shader::TextureProcessMode::LL;
+
+ constexpr std::array<const char*, 4> coord_container{
+ {"", "int coord = (", "ivec2 coord = ivec2(", "ivec3 coord = ivec3("}};
+
+ std::string coord = coord_container[total_coord_count];
+
+ // If enabled arrays index is always stored in the gpr8 field
+ const u64 array_register = instr.gpr8.Value();
+
+ // if is array gpr20 is used
+ const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
+
+ const u64 last_coord_register =
+ ((coord_count > 2) || (coord_count == 2 && !lod_enabled)) && !is_array
+ ? static_cast<u64>(instr.gpr20.Value())
+ : coord_register + 1;
+
+ for (size_t i = 0; i < coord_count; ++i) {
+ const bool last = (i == (coord_count - 1)) && (coord_count > 1);
+ coord += regs.GetRegisterAsInteger(last ? last_coord_register : coord_register + i);
+ if (i < coord_count - 1) {
+ coord += ',';
+ }
+ }
+ if (is_array) {
+ coord += ',' + regs.GetRegisterAsInteger(array_register);
+ }
+ coord += ");";
+
+ const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+
+ std::string texture = "texelFetch(" + sampler + ", coords";
+
+ if (lod_enabled) {
+ // When lod is used always is in grp20
+ texture += ", " + regs.GetRegisterAsInteger(instr.gpr20) + ')';
+ } else {
+ texture += ", 0)";
+ }
+ return std::make_pair(coord, texture);
+ }
+
/**
* Compiles a single instruction from Tegra to GLSL.
* @param offset the offset of the Tegra shader instruction.
@@ -1587,9 +1867,6 @@ private:
UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0,
"FMUL tab5cb8_2({}) is not implemented",
instr.fmul.tab5cb8_2.Value());
- UNIMPLEMENTED_IF_MSG(instr.fmul.tab5c68_1 != 0,
- "FMUL tab5cb8_1({}) is not implemented",
- instr.fmul.tab5c68_1.Value());
UNIMPLEMENTED_IF_MSG(
instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
instr.fmul.tab5c68_0
@@ -1599,7 +1876,26 @@ private:
op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
+ std::string postfactor_op;
+ if (instr.fmul.postfactor != 0) {
+ s8 postfactor = static_cast<s8>(instr.fmul.postfactor);
+
+ // postfactor encoded as 3-bit 1's complement in instruction,
+ // interpreted with below logic.
+ if (postfactor >= 4) {
+ postfactor = 7 - postfactor;
+ } else {
+ postfactor = 0 - postfactor;
+ }
+
+ if (postfactor > 0) {
+ postfactor_op = " * " + std::to_string(1 << postfactor);
+ } else {
+ postfactor_op = " / " + std::to_string(1 << -postfactor);
+ }
+ }
+
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + postfactor_op, 1, 1,
instr.alu.saturate_d, 0, true);
break;
}
@@ -1768,6 +2064,8 @@ private:
std::to_string(instr.alu.GetSignedImm20_20())};
default:
UNREACHABLE();
+ return {regs.GetRegisterAsInteger(instr.gpr39, 0, false),
+ std::to_string(instr.alu.GetSignedImm20_20())};
}
}();
const std::string offset = '(' + packed_shift + " & 0xff)";
@@ -2574,168 +2872,32 @@ private:
case OpCode::Id::TEX: {
Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
-
+ const bool depth_compare =
+ instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
+ const auto process_mode = instr.tex.GetTextureProcessMode();
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- const bool depth_compare =
- instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
- u32 num_coordinates = TextureCoordinates(texture_type);
- u32 start_index = 0;
- std::string array_elem;
- if (is_array) {
- array_elem = regs.GetRegisterAsInteger(instr.gpr8);
- start_index = 1;
- }
- const auto process_mode = instr.tex.GetTextureProcessMode();
- u32 start_index_b = 0;
- std::string lod_value;
- if (process_mode != Tegra::Shader::TextureProcessMode::LZ &&
- process_mode != Tegra::Shader::TextureProcessMode::None) {
- start_index_b = 1;
- lod_value = regs.GetRegisterAsFloat(instr.gpr20);
- }
-
- std::string depth_value;
- if (depth_compare) {
- depth_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + start_index_b);
- }
-
- bool depth_compare_extra = false;
+ const auto [coord, texture] =
+ GetTEXCode(instr, texture_type, process_mode, depth_compare, is_array);
const auto scope = shader.Scope();
-
- switch (num_coordinates) {
- case 1: {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
- if (is_array) {
- if (depth_compare) {
- shader.AddLine("vec3 coords = vec3(" + x + ", " + depth_value + ", " +
- array_elem + ");");
- } else {
- shader.AddLine("vec2 coords = vec2(" + x + ", " + array_elem + ");");
- }
- } else {
- if (depth_compare) {
- shader.AddLine("vec2 coords = vec2(" + x + ", " + depth_value + ");");
- } else {
- shader.AddLine("float coords = " + x + ';');
- }
- }
- break;
- }
- case 2: {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
- const std::string y =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
- if (is_array) {
- if (depth_compare) {
- shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " +
- depth_value + ", " + array_elem + ");");
- } else {
- shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " +
- array_elem + ");");
- }
- } else {
- if (depth_compare) {
- shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " +
- depth_value + ");");
- } else {
- shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
- }
- }
- break;
- }
- case 3: {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
- const std::string y =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
- const std::string z =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2);
- if (is_array) {
- depth_compare_extra = depth_compare;
- shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
- array_elem + ");");
- } else {
- if (depth_compare) {
- shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
- depth_value + ");");
- } else {
- shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");");
- }
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled coordinates number {}",
- static_cast<u32>(num_coordinates));
-
- // Fallback to interpreting as a 2D texture for now
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
- texture_type = Tegra::Shader::TextureType::Texture2D;
- }
-
- const std::string sampler =
- GetSampler(instr.sampler, texture_type, is_array, depth_compare);
- // Add an extra scope and declare the texture coords inside to prevent
- // overwriting them in case they are used as outputs of the texs instruction.
-
- const std::string texture = [&]() {
- switch (instr.tex.GetTextureProcessMode()) {
- case Tegra::Shader::TextureProcessMode::None:
- if (depth_compare_extra) {
- return "texture(" + sampler + ", coords, " + depth_value + ')';
- }
- return "texture(" + sampler + ", coords)";
- case Tegra::Shader::TextureProcessMode::LZ:
- if (depth_compare_extra) {
- return "texture(" + sampler + ", coords, " + depth_value + ')';
- }
- return "textureLod(" + sampler + ", coords, 0.0)";
- case Tegra::Shader::TextureProcessMode::LB:
- case Tegra::Shader::TextureProcessMode::LBA:
- // TODO: Figure if A suffix changes the equation at all.
- if (depth_compare_extra) {
- LOG_WARNING(
- HW_GPU,
- "OpenGL Limitation: can't set bias value along depth compare");
- return "texture(" + sampler + ", coords, " + depth_value + ')';
- }
- return "texture(" + sampler + ", coords, " + lod_value + ')';
- case Tegra::Shader::TextureProcessMode::LL:
- case Tegra::Shader::TextureProcessMode::LLA:
- // TODO: Figure if A suffix changes the equation at all.
- if (depth_compare_extra) {
- LOG_WARNING(
- HW_GPU,
- "OpenGL Limitation: can't set lod value along depth compare");
- return "texture(" + sampler + ", coords, " + depth_value + ')';
- }
- return "textureLod(" + sampler + ", coords, " + lod_value + ')';
- default:
- UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
- static_cast<u32>(instr.tex.GetTextureProcessMode()));
- if (depth_compare_extra) {
- return "texture(" + sampler + ", coords, " + depth_value + ')';
- }
- return "texture(" + sampler + ", coords)";
- }
- }();
+ shader.AddLine(coord);
if (depth_compare) {
regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
} else {
+ shader.AddLine("vec4 texture_tmp = " + texture + ';');
std::size_t dest_elem{};
for (std::size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
}
- regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem);
+ regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
+ dest_elem);
++dest_elem;
}
}
@@ -2743,138 +2905,37 @@ private:
}
case OpCode::Id::TEXS: {
Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
- bool is_array{instr.texs.IsArrayTexture()};
+ const bool is_array{instr.texs.IsArrayTexture()};
+ const bool depth_compare =
+ instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
+ const auto process_mode = instr.texs.GetTextureProcessMode();
UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
const auto scope = shader.Scope();
- const bool depth_compare =
- instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
- u32 num_coordinates = TextureCoordinates(texture_type);
- const auto process_mode = instr.texs.GetTextureProcessMode();
- u32 lod_offset = 0;
- if (process_mode == Tegra::Shader::TextureProcessMode::LL) {
- if (num_coordinates > 2) {
- shader.AddLine("float lod_value = " +
- regs.GetRegisterAsFloat(instr.gpr20.Value() + 1) + ';');
- lod_offset = 2;
- } else {
- shader.AddLine("float lod_value = " + regs.GetRegisterAsFloat(instr.gpr20) +
- ';');
- lod_offset = 1;
- }
- }
+ auto [coord, texture] =
+ GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array);
- switch (num_coordinates) {
- case 1: {
- shader.AddLine("float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';');
- break;
- }
- case 2: {
- if (is_array) {
- if (depth_compare) {
- const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
- shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
- index + ");");
- } else {
- const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + index +
- ");");
- }
- } else {
- if (lod_offset != 0) {
- if (depth_compare) {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string z =
- regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset);
- shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z +
- ");");
- } else {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
- }
- } else {
- if (depth_compare) {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
- shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z +
- ");");
- } else {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
- }
- }
- }
- break;
- }
- case 3: {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
- shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");");
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled coordinates number {}",
- static_cast<u32>(num_coordinates));
+ shader.AddLine(coord);
- // Fallback to interpreting as a 2D texture for now
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
- texture_type = Tegra::Shader::TextureType::Texture2D;
- is_array = false;
- }
- const std::string sampler =
- GetSampler(instr.sampler, texture_type, is_array, depth_compare);
-
- std::string texture = [&]() {
- switch (process_mode) {
- case Tegra::Shader::TextureProcessMode::None:
- return "texture(" + sampler + ", coords)";
- case Tegra::Shader::TextureProcessMode::LZ:
- if (depth_compare && is_array) {
- return "texture(" + sampler + ", coords)";
- } else {
- return "textureLod(" + sampler + ", coords, 0.0)";
- }
- break;
- case Tegra::Shader::TextureProcessMode::LL:
- return "textureLod(" + sampler + ", coords, lod_value)";
- default:
- UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
- static_cast<u32>(instr.texs.GetTextureProcessMode()));
- return "texture(" + sampler + ", coords)";
- }
- }();
if (depth_compare) {
texture = "vec4(" + texture + ')';
}
+ shader.AddLine("vec4 texture_tmp = " + texture + ';');
- WriteTexsInstruction(instr, texture);
+ if (instr.texs.fp32_flag) {
+ WriteTexsInstructionFloat(instr, "texture_tmp");
+ } else {
+ WriteTexsInstructionHalfFloat(instr, "texture_tmp");
+ }
break;
}
case OpCode::Id::TLDS: {
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
- ASSERT(texture_type == Tegra::Shader::TextureType::Texture2D);
- ASSERT(is_array == false);
-
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
@@ -2882,54 +2943,16 @@ private:
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
"MZ is not implemented");
- u32 extra_op_offset = 0;
-
- ShaderScopedScope scope = shader.Scope();
-
- switch (texture_type) {
- case Tegra::Shader::TextureType::Texture1D: {
- const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
- shader.AddLine("float coords = " + x + ';');
- break;
- }
- case Tegra::Shader::TextureType::Texture2D: {
- UNIMPLEMENTED_IF_MSG(is_array, "Unhandled 2d array texture");
-
- const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
- const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
- // shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");");
- shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");");
- extra_op_offset = 1;
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
- }
- const std::string sampler =
- GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto [coord, texture] = GetTLDSCode(instr, texture_type, is_array);
- const std::string texture = [&]() {
- switch (instr.tlds.GetTextureProcessMode()) {
- case Tegra::Shader::TextureProcessMode::LZ:
- return "texelFetch(" + sampler + ", coords, 0)";
- case Tegra::Shader::TextureProcessMode::LL:
- shader.AddLine(
- "float lod = " +
- regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';');
- return "texelFetch(" + sampler + ", coords, lod)";
- default:
- UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
- static_cast<u32>(instr.tlds.GetTextureProcessMode()));
- return "texelFetch(" + sampler + ", coords, 0)";
- }
- }();
+ const auto scope = shader.Scope();
- WriteTexsInstruction(instr, texture);
+ shader.AddLine(coord);
+ shader.AddLine("vec4 texture_tmp = " + texture + ';');
+ WriteTexsInstructionFloat(instr, "texture_tmp");
break;
}
case OpCode::Id::TLD4: {
- ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D);
- ASSERT(instr.tld4.array == 0);
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
@@ -2939,56 +2962,29 @@ private:
"NDV is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP),
"PTP is not implemented");
+
+ auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare =
instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
- auto texture_type = instr.tld4.texture_type.Value();
- u32 num_coordinates = TextureCoordinates(texture_type);
- if (depth_compare)
- num_coordinates += 1;
+ const bool is_array = instr.tld4.array != 0;
- const auto scope = shader.Scope();
-
- switch (num_coordinates) {
- case 2: {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
- break;
- }
- case 3: {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
- shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");");
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled coordinates number {}",
- static_cast<u32>(num_coordinates));
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
- texture_type = Tegra::Shader::TextureType::Texture2D;
- }
+ const auto [coord, texture] =
+ GetTLD4Code(instr, texture_type, depth_compare, is_array);
- const std::string sampler =
- GetSampler(instr.sampler, texture_type, false, depth_compare);
+ const auto scope = shader.Scope();
- const std::string texture = "textureGather(" + sampler + ", coords, " +
- std::to_string(instr.tld4.component) + ')';
+ shader.AddLine(coord);
+ std::size_t dest_elem{};
- if (depth_compare) {
- regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
- } else {
- std::size_t dest_elem{};
- for (std::size_t elem = 0; elem < 4; ++elem) {
- if (!instr.tex.IsComponentEnabled(elem)) {
- // Skip disabled components
- continue;
- }
- regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem);
- ++dest_elem;
+ shader.AddLine("vec4 texture_tmp = " + texture + ';');
+ for (std::size_t elem = 0; elem < 4; ++elem) {
+ if (!instr.tex.IsComponentEnabled(elem)) {
+ // Skip disabled components
+ continue;
}
+ regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
+ dest_elem);
+ ++dest_elem;
}
break;
}
@@ -3002,27 +2998,31 @@ private:
const auto scope = shader.Scope();
+ std::string coords;
+
const bool depth_compare =
instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
- const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
- // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
+
const std::string sampler = GetSampler(
instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare);
- if (depth_compare) {
- // Note: TLD4S coordinate encoding works just like TEXS's
- const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- shader.AddLine("vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");");
- } else {
- shader.AddLine("vec2 coords = vec2(" + op_a + ", " + op_b + ");");
- }
- std::string texture = "textureGather(" + sampler + ", coords, " +
- std::to_string(instr.tld4s.component) + ')';
- if (depth_compare) {
- texture = "vec4(" + texture + ')';
+ const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+ coords = "vec2 coords = vec2(" + op_a + ", ";
+ std::string texture = "textureGather(" + sampler + ", coords, ";
+
+ if (!depth_compare) {
+ const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
+ coords += op_b + ");";
+ texture += std::to_string(instr.tld4s.component) + ')';
+ } else {
+ const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20);
+ coords += op_b + ");";
+ texture += op_c + ')';
}
- WriteTexsInstruction(instr, texture);
+ shader.AddLine(coords);
+ shader.AddLine("vec4 texture_tmp = " + texture + ';');
+ WriteTexsInstructionFloat(instr, "texture_tmp");
break;
}
case OpCode::Id::TXQ: {
@@ -3316,6 +3316,7 @@ private:
return std::to_string(instr.r2p.immediate_mask);
default:
UNREACHABLE();
+ return std::to_string(instr.r2p.immediate_mask);
}
}();
const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
@@ -3779,7 +3780,10 @@ private:
}
break;
}
- default: { UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); }
+ default: {
+ UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
+ break;
+ }
}
break;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 23ed91e27..5d0819dc5 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <fmt/format.h>
#include "common/assert.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -16,6 +17,8 @@ static constexpr u32 PROGRAM_OFFSET{10};
ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+ const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
+ out += "// Shader Unique Id: VS" + id + "\n\n";
out += Decompiler::GetCommonDeclarations();
out += R"(
@@ -84,6 +87,8 @@ void main() {
ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
// Version is intentionally skipped in shader generation, it's added by the lazy compilation.
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
+ const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
+ out += "// Shader Unique Id: GS" + id + "\n\n";
out += Decompiler::GetCommonDeclarations();
out += "bool exec_geometry();\n";
@@ -117,6 +122,8 @@ void main() {
ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+ const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
+ out += "// Shader Unique Id: FS" + id + "\n\n";
out += Decompiler::GetCommonDeclarations();
out += "bool exec_fragment();\n";
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 4fa6d7612..fcc20d3b4 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -177,6 +177,9 @@ struct ShaderSetup {
struct {
ProgramCode code;
ProgramCode code_b; // Used for dual vertex shaders
+ u64 unique_identifier;
+ std::size_t real_size;
+ std::size_t real_size_b;
} program;
/// Used in scenarios where we have a dual vertex shaders
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 4fd0d66c5..f02415139 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -427,6 +427,7 @@ static const char* GetSource(GLenum source) {
RET(OTHER);
default:
UNREACHABLE();
+ return "Unknown source";
}
#undef RET
}
@@ -445,6 +446,7 @@ static const char* GetType(GLenum type) {
RET(MARKER);
default:
UNREACHABLE();
+ return "Unknown type";
}
#undef RET
}
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 9582dd2ca..a97b1562b 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -65,6 +65,7 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
+ return PixelFormat::S8Z24;
}
}
@@ -141,6 +142,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
+ return PixelFormat::RGBA8_SRGB;
}
}
@@ -327,6 +329,7 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
static_cast<u32>(component_type));
UNREACHABLE();
+ return PixelFormat::ABGR8U;
}
}
@@ -346,6 +349,7 @@ ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
default:
LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
UNREACHABLE();
+ return ComponentType::UNorm;
}
}
@@ -393,6 +397,7 @@ ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
+ return ComponentType::UNorm;
}
}
@@ -403,6 +408,7 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
+ return PixelFormat::ABGR8U;
}
}
@@ -418,6 +424,7 @@ ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
+ return ComponentType::UNorm;
}
}
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 0dd3eb2e4..e23cfecbc 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -125,6 +125,75 @@ enum class SurfaceTarget {
TextureCubeArray,
};
+constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
+ 1, // ABGR8U
+ 1, // ABGR8S
+ 1, // ABGR8UI
+ 1, // B5G6R5U
+ 1, // A2B10G10R10U
+ 1, // A1B5G5R5U
+ 1, // R8U
+ 1, // R8UI
+ 1, // RGBA16F
+ 1, // RGBA16U
+ 1, // RGBA16UI
+ 1, // R11FG11FB10F
+ 1, // RGBA32UI
+ 4, // DXT1
+ 4, // DXT23
+ 4, // DXT45
+ 4, // DXN1
+ 4, // DXN2UNORM
+ 4, // DXN2SNORM
+ 4, // BC7U
+ 4, // BC6H_UF16
+ 4, // BC6H_SF16
+ 4, // ASTC_2D_4X4
+ 1, // G8R8U
+ 1, // G8R8S
+ 1, // BGRA8
+ 1, // RGBA32F
+ 1, // RG32F
+ 1, // R32F
+ 1, // R16F
+ 1, // R16U
+ 1, // R16S
+ 1, // R16UI
+ 1, // R16I
+ 1, // RG16
+ 1, // RG16F
+ 1, // RG16UI
+ 1, // RG16I
+ 1, // RG16S
+ 1, // RGB32F
+ 1, // RGBA8_SRGB
+ 1, // RG8U
+ 1, // RG8S
+ 1, // RG32UI
+ 1, // R32UI
+ 4, // ASTC_2D_8X8
+ 4, // ASTC_2D_8X5
+ 4, // ASTC_2D_5X4
+ 1, // BGRA8_SRGB
+ 4, // DXT1_SRGB
+ 4, // DXT23_SRGB
+ 4, // DXT45_SRGB
+ 4, // BC7U_SRGB
+ 4, // ASTC_2D_4X4_SRGB
+ 4, // ASTC_2D_8X8_SRGB
+ 4, // ASTC_2D_8X5_SRGB
+ 4, // ASTC_2D_5X4_SRGB
+ 4, // ASTC_2D_5X5
+ 4, // ASTC_2D_5X5_SRGB
+ 4, // ASTC_2D_10X8
+ 4, // ASTC_2D_10X8_SRGB
+ 1, // Z32F
+ 1, // Z16
+ 1, // Z24S8
+ 1, // S8Z24
+ 1, // Z32FS8
+}};
+
/**
* Gets the compression factor for the specified PixelFormat. This applies to just the
* "compressed width" and "compressed height", not the overall compression factor of a
@@ -135,304 +204,237 @@ static constexpr u32 GetCompressionFactor(PixelFormat format) {
if (format == PixelFormat::Invalid)
return 0;
- constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
- 1, // ABGR8U
- 1, // ABGR8S
- 1, // ABGR8UI
- 1, // B5G6R5U
- 1, // A2B10G10R10U
- 1, // A1B5G5R5U
- 1, // R8U
- 1, // R8UI
- 1, // RGBA16F
- 1, // RGBA16U
- 1, // RGBA16UI
- 1, // R11FG11FB10F
- 1, // RGBA32UI
- 4, // DXT1
- 4, // DXT23
- 4, // DXT45
- 4, // DXN1
- 4, // DXN2UNORM
- 4, // DXN2SNORM
- 4, // BC7U
- 4, // BC6H_UF16
- 4, // BC6H_SF16
- 4, // ASTC_2D_4X4
- 1, // G8R8U
- 1, // G8R8S
- 1, // BGRA8
- 1, // RGBA32F
- 1, // RG32F
- 1, // R32F
- 1, // R16F
- 1, // R16U
- 1, // R16S
- 1, // R16UI
- 1, // R16I
- 1, // RG16
- 1, // RG16F
- 1, // RG16UI
- 1, // RG16I
- 1, // RG16S
- 1, // RGB32F
- 1, // RGBA8_SRGB
- 1, // RG8U
- 1, // RG8S
- 1, // RG32UI
- 1, // R32UI
- 4, // ASTC_2D_8X8
- 4, // ASTC_2D_8X5
- 4, // ASTC_2D_5X4
- 1, // BGRA8_SRGB
- 4, // DXT1_SRGB
- 4, // DXT23_SRGB
- 4, // DXT45_SRGB
- 4, // BC7U_SRGB
- 4, // ASTC_2D_4X4_SRGB
- 4, // ASTC_2D_8X8_SRGB
- 4, // ASTC_2D_8X5_SRGB
- 4, // ASTC_2D_5X4_SRGB
- 4, // ASTC_2D_5X5
- 4, // ASTC_2D_5X5_SRGB
- 4, // ASTC_2D_10X8
- 4, // ASTC_2D_10X8_SRGB
- 1, // Z32F
- 1, // Z16
- 1, // Z24S8
- 1, // S8Z24
- 1, // Z32FS8
- }};
-
ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size());
return compression_factor_table[static_cast<std::size_t>(format)];
}
+constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
+ 1, // ABGR8U
+ 1, // ABGR8S
+ 1, // ABGR8UI
+ 1, // B5G6R5U
+ 1, // A2B10G10R10U
+ 1, // A1B5G5R5U
+ 1, // R8U
+ 1, // R8UI
+ 1, // RGBA16F
+ 1, // RGBA16U
+ 1, // RGBA16UI
+ 1, // R11FG11FB10F
+ 1, // RGBA32UI
+ 4, // DXT1
+ 4, // DXT23
+ 4, // DXT45
+ 4, // DXN1
+ 4, // DXN2UNORM
+ 4, // DXN2SNORM
+ 4, // BC7U
+ 4, // BC6H_UF16
+ 4, // BC6H_SF16
+ 4, // ASTC_2D_4X4
+ 1, // G8R8U
+ 1, // G8R8S
+ 1, // BGRA8
+ 1, // RGBA32F
+ 1, // RG32F
+ 1, // R32F
+ 1, // R16F
+ 1, // R16U
+ 1, // R16S
+ 1, // R16UI
+ 1, // R16I
+ 1, // RG16
+ 1, // RG16F
+ 1, // RG16UI
+ 1, // RG16I
+ 1, // RG16S
+ 1, // RGB32F
+ 1, // RGBA8_SRGB
+ 1, // RG8U
+ 1, // RG8S
+ 1, // RG32UI
+ 1, // R32UI
+ 8, // ASTC_2D_8X8
+ 8, // ASTC_2D_8X5
+ 5, // ASTC_2D_5X4
+ 1, // BGRA8_SRGB
+ 4, // DXT1_SRGB
+ 4, // DXT23_SRGB
+ 4, // DXT45_SRGB
+ 4, // BC7U_SRGB
+ 4, // ASTC_2D_4X4_SRGB
+ 8, // ASTC_2D_8X8_SRGB
+ 8, // ASTC_2D_8X5_SRGB
+ 5, // ASTC_2D_5X4_SRGB
+ 5, // ASTC_2D_5X5
+ 5, // ASTC_2D_5X5_SRGB
+ 10, // ASTC_2D_10X8
+ 10, // ASTC_2D_10X8_SRGB
+ 1, // Z32F
+ 1, // Z16
+ 1, // Z24S8
+ 1, // S8Z24
+ 1, // Z32FS8
+}};
+
static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
if (format == PixelFormat::Invalid)
return 0;
- constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
- 1, // ABGR8U
- 1, // ABGR8S
- 1, // ABGR8UI
- 1, // B5G6R5U
- 1, // A2B10G10R10U
- 1, // A1B5G5R5U
- 1, // R8U
- 1, // R8UI
- 1, // RGBA16F
- 1, // RGBA16U
- 1, // RGBA16UI
- 1, // R11FG11FB10F
- 1, // RGBA32UI
- 4, // DXT1
- 4, // DXT23
- 4, // DXT45
- 4, // DXN1
- 4, // DXN2UNORM
- 4, // DXN2SNORM
- 4, // BC7U
- 4, // BC6H_UF16
- 4, // BC6H_SF16
- 4, // ASTC_2D_4X4
- 1, // G8R8U
- 1, // G8R8S
- 1, // BGRA8
- 1, // RGBA32F
- 1, // RG32F
- 1, // R32F
- 1, // R16F
- 1, // R16U
- 1, // R16S
- 1, // R16UI
- 1, // R16I
- 1, // RG16
- 1, // RG16F
- 1, // RG16UI
- 1, // RG16I
- 1, // RG16S
- 1, // RGB32F
- 1, // RGBA8_SRGB
- 1, // RG8U
- 1, // RG8S
- 1, // RG32UI
- 1, // R32UI
- 8, // ASTC_2D_8X8
- 8, // ASTC_2D_8X5
- 5, // ASTC_2D_5X4
- 1, // BGRA8_SRGB
- 4, // DXT1_SRGB
- 4, // DXT23_SRGB
- 4, // DXT45_SRGB
- 4, // BC7U_SRGB
- 4, // ASTC_2D_4X4_SRGB
- 8, // ASTC_2D_8X8_SRGB
- 8, // ASTC_2D_8X5_SRGB
- 5, // ASTC_2D_5X4_SRGB
- 5, // ASTC_2D_5X5
- 5, // ASTC_2D_5X5_SRGB
- 10, // ASTC_2D_10X8
- 10, // ASTC_2D_10X8_SRGB
- 1, // Z32F
- 1, // Z16
- 1, // Z24S8
- 1, // S8Z24
- 1, // Z32FS8
- }};
+
ASSERT(static_cast<std::size_t>(format) < block_width_table.size());
return block_width_table[static_cast<std::size_t>(format)];
}
+constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
+ 1, // ABGR8U
+ 1, // ABGR8S
+ 1, // ABGR8UI
+ 1, // B5G6R5U
+ 1, // A2B10G10R10U
+ 1, // A1B5G5R5U
+ 1, // R8U
+ 1, // R8UI
+ 1, // RGBA16F
+ 1, // RGBA16U
+ 1, // RGBA16UI
+ 1, // R11FG11FB10F
+ 1, // RGBA32UI
+ 4, // DXT1
+ 4, // DXT23
+ 4, // DXT45
+ 4, // DXN1
+ 4, // DXN2UNORM
+ 4, // DXN2SNORM
+ 4, // BC7U
+ 4, // BC6H_UF16
+ 4, // BC6H_SF16
+ 4, // ASTC_2D_4X4
+ 1, // G8R8U
+ 1, // G8R8S
+ 1, // BGRA8
+ 1, // RGBA32F
+ 1, // RG32F
+ 1, // R32F
+ 1, // R16F
+ 1, // R16U
+ 1, // R16S
+ 1, // R16UI
+ 1, // R16I
+ 1, // RG16
+ 1, // RG16F
+ 1, // RG16UI
+ 1, // RG16I
+ 1, // RG16S
+ 1, // RGB32F
+ 1, // RGBA8_SRGB
+ 1, // RG8U
+ 1, // RG8S
+ 1, // RG32UI
+ 1, // R32UI
+ 8, // ASTC_2D_8X8
+ 5, // ASTC_2D_8X5
+ 4, // ASTC_2D_5X4
+ 1, // BGRA8_SRGB
+ 4, // DXT1_SRGB
+ 4, // DXT23_SRGB
+ 4, // DXT45_SRGB
+ 4, // BC7U_SRGB
+ 4, // ASTC_2D_4X4_SRGB
+ 8, // ASTC_2D_8X8_SRGB
+ 5, // ASTC_2D_8X5_SRGB
+ 4, // ASTC_2D_5X4_SRGB
+ 5, // ASTC_2D_5X5
+ 5, // ASTC_2D_5X5_SRGB
+ 8, // ASTC_2D_10X8
+ 8, // ASTC_2D_10X8_SRGB
+ 1, // Z32F
+ 1, // Z16
+ 1, // Z24S8
+ 1, // S8Z24
+ 1, // Z32FS8
+}};
+
static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
if (format == PixelFormat::Invalid)
return 0;
- constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
- 1, // ABGR8U
- 1, // ABGR8S
- 1, // ABGR8UI
- 1, // B5G6R5U
- 1, // A2B10G10R10U
- 1, // A1B5G5R5U
- 1, // R8U
- 1, // R8UI
- 1, // RGBA16F
- 1, // RGBA16U
- 1, // RGBA16UI
- 1, // R11FG11FB10F
- 1, // RGBA32UI
- 4, // DXT1
- 4, // DXT23
- 4, // DXT45
- 4, // DXN1
- 4, // DXN2UNORM
- 4, // DXN2SNORM
- 4, // BC7U
- 4, // BC6H_UF16
- 4, // BC6H_SF16
- 4, // ASTC_2D_4X4
- 1, // G8R8U
- 1, // G8R8S
- 1, // BGRA8
- 1, // RGBA32F
- 1, // RG32F
- 1, // R32F
- 1, // R16F
- 1, // R16U
- 1, // R16S
- 1, // R16UI
- 1, // R16I
- 1, // RG16
- 1, // RG16F
- 1, // RG16UI
- 1, // RG16I
- 1, // RG16S
- 1, // RGB32F
- 1, // RGBA8_SRGB
- 1, // RG8U
- 1, // RG8S
- 1, // RG32UI
- 1, // R32UI
- 8, // ASTC_2D_8X8
- 5, // ASTC_2D_8X5
- 4, // ASTC_2D_5X4
- 1, // BGRA8_SRGB
- 4, // DXT1_SRGB
- 4, // DXT23_SRGB
- 4, // DXT45_SRGB
- 4, // BC7U_SRGB
- 4, // ASTC_2D_4X4_SRGB
- 8, // ASTC_2D_8X8_SRGB
- 5, // ASTC_2D_8X5_SRGB
- 4, // ASTC_2D_5X4_SRGB
- 5, // ASTC_2D_5X5
- 5, // ASTC_2D_5X5_SRGB
- 8, // ASTC_2D_10X8
- 8, // ASTC_2D_10X8_SRGB
- 1, // Z32F
- 1, // Z16
- 1, // Z24S8
- 1, // S8Z24
- 1, // Z32FS8
- }};
-
ASSERT(static_cast<std::size_t>(format) < block_height_table.size());
return block_height_table[static_cast<std::size_t>(format)];
}
+constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
+ 32, // ABGR8U
+ 32, // ABGR8S
+ 32, // ABGR8UI
+ 16, // B5G6R5U
+ 32, // A2B10G10R10U
+ 16, // A1B5G5R5U
+ 8, // R8U
+ 8, // R8UI
+ 64, // RGBA16F
+ 64, // RGBA16U
+ 64, // RGBA16UI
+ 32, // R11FG11FB10F
+ 128, // RGBA32UI
+ 64, // DXT1
+ 128, // DXT23
+ 128, // DXT45
+ 64, // DXN1
+ 128, // DXN2UNORM
+ 128, // DXN2SNORM
+ 128, // BC7U
+ 128, // BC6H_UF16
+ 128, // BC6H_SF16
+ 128, // ASTC_2D_4X4
+ 16, // G8R8U
+ 16, // G8R8S
+ 32, // BGRA8
+ 128, // RGBA32F
+ 64, // RG32F
+ 32, // R32F
+ 16, // R16F
+ 16, // R16U
+ 16, // R16S
+ 16, // R16UI
+ 16, // R16I
+ 32, // RG16
+ 32, // RG16F
+ 32, // RG16UI
+ 32, // RG16I
+ 32, // RG16S
+ 96, // RGB32F
+ 32, // RGBA8_SRGB
+ 16, // RG8U
+ 16, // RG8S
+ 64, // RG32UI
+ 32, // R32UI
+ 128, // ASTC_2D_8X8
+ 128, // ASTC_2D_8X5
+ 128, // ASTC_2D_5X4
+ 32, // BGRA8_SRGB
+ 64, // DXT1_SRGB
+ 128, // DXT23_SRGB
+ 128, // DXT45_SRGB
+ 128, // BC7U
+ 128, // ASTC_2D_4X4_SRGB
+ 128, // ASTC_2D_8X8_SRGB
+ 128, // ASTC_2D_8X5_SRGB
+ 128, // ASTC_2D_5X4_SRGB
+ 128, // ASTC_2D_5X5
+ 128, // ASTC_2D_5X5_SRGB
+ 128, // ASTC_2D_10X8
+ 128, // ASTC_2D_10X8_SRGB
+ 32, // Z32F
+ 16, // Z16
+ 32, // Z24S8
+ 32, // S8Z24
+ 64, // Z32FS8
+}};
+
static constexpr u32 GetFormatBpp(PixelFormat format) {
if (format == PixelFormat::Invalid)
return 0;
- constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
- 32, // ABGR8U
- 32, // ABGR8S
- 32, // ABGR8UI
- 16, // B5G6R5U
- 32, // A2B10G10R10U
- 16, // A1B5G5R5U
- 8, // R8U
- 8, // R8UI
- 64, // RGBA16F
- 64, // RGBA16U
- 64, // RGBA16UI
- 32, // R11FG11FB10F
- 128, // RGBA32UI
- 64, // DXT1
- 128, // DXT23
- 128, // DXT45
- 64, // DXN1
- 128, // DXN2UNORM
- 128, // DXN2SNORM
- 128, // BC7U
- 128, // BC6H_UF16
- 128, // BC6H_SF16
- 128, // ASTC_2D_4X4
- 16, // G8R8U
- 16, // G8R8S
- 32, // BGRA8
- 128, // RGBA32F
- 64, // RG32F
- 32, // R32F
- 16, // R16F
- 16, // R16U
- 16, // R16S
- 16, // R16UI
- 16, // R16I
- 32, // RG16
- 32, // RG16F
- 32, // RG16UI
- 32, // RG16I
- 32, // RG16S
- 96, // RGB32F
- 32, // RGBA8_SRGB
- 16, // RG8U
- 16, // RG8S
- 64, // RG32UI
- 32, // R32UI
- 128, // ASTC_2D_8X8
- 128, // ASTC_2D_8X5
- 128, // ASTC_2D_5X4
- 32, // BGRA8_SRGB
- 64, // DXT1_SRGB
- 128, // DXT23_SRGB
- 128, // DXT45_SRGB
- 128, // BC7U
- 128, // ASTC_2D_4X4_SRGB
- 128, // ASTC_2D_8X8_SRGB
- 128, // ASTC_2D_8X5_SRGB
- 128, // ASTC_2D_5X4_SRGB
- 128, // ASTC_2D_5X5
- 128, // ASTC_2D_5X5_SRGB
- 128, // ASTC_2D_10X8
- 128, // ASTC_2D_10X8_SRGB
- 32, // Z32F
- 16, // Z16
- 32, // Z24S8
- 32, // S8Z24
- 64, // Z32FS8
- }};
-
ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
return bpp_table[static_cast<std::size_t>(format)];
}
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index bbae9285f..5db75de22 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -226,7 +226,7 @@ u32 BytesPerPixel(TextureFormat format) {
return 8;
default:
UNIMPLEMENTED_MSG("Format not implemented");
- break;
+ return 1;
}
}