aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/fermi_2d.cpp4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp2
-rw-r--r--src/video_core/engines/shader_bytecode.h31
-rw-r--r--src/video_core/gpu.cpp1
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/morton.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp27
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp88
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h38
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp87
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h4
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp50
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp29
-rw-r--r--src/video_core/shader/decode/warp.cpp47
-rw-r--r--src/video_core/shader/node.h10
-rw-r--r--src/video_core/surface.cpp3
-rw-r--r--src/video_core/surface.h50
21 files changed, 313 insertions, 167 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 98a8b5337..7ff44f06d 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -29,8 +29,8 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
}
void Fermi2D::HandleSurfaceCopy() {
- LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
- static_cast<u32>(regs.operation));
+ LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}",
+ static_cast<u32>(regs.operation));
// TODO(Subv): Only raw copies are implemented.
ASSERT(regs.operation == Operation::SrcCopy);
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index e9c15beff..b318aedb8 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -639,7 +639,7 @@ void Maxwell3D::ProcessSyncPoint() {
}
void Maxwell3D::DrawArrays() {
- LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 052e6d24e..28272ef6f 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -566,6 +566,13 @@ enum class ImageAtomicOperation : u64 {
Exch = 8,
};
+enum class ShuffleOperation : u64 {
+ Idx = 0, // shuffleNV
+ Up = 1, // shuffleUpNV
+ Down = 2, // shuffleDownNV
+ Bfly = 3, // shuffleXorNV
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -600,6 +607,15 @@ union Instruction {
} vote;
union {
+ BitField<30, 2, ShuffleOperation> operation;
+ BitField<48, 3, u64> pred48;
+ BitField<28, 1, u64> is_index_imm;
+ BitField<29, 1, u64> is_mask_imm;
+ BitField<20, 5, u64> index_imm;
+ BitField<34, 13, u64> mask_imm;
+ } shfl;
+
+ union {
BitField<8, 8, Register> gpr;
BitField<20, 24, s64> offset;
} gmem;
@@ -934,6 +950,11 @@ union Instruction {
} isetp;
union {
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, PredCondition> cond;
+ } icmp;
+
+ union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
BitField<12, 3, u64> pred12;
@@ -1542,6 +1563,7 @@ public:
BRK,
DEPBAR,
VOTE,
+ SHFL,
BFE_C,
BFE_R,
BFE_IMM,
@@ -1628,6 +1650,10 @@ public:
SEL_C,
SEL_R,
SEL_IMM,
+ ICMP_RC,
+ ICMP_R,
+ ICMP_CR,
+ ICMP_IMM,
MUFU, // Multi-Function Operator
RRO_C, // Range Reduction Operator
RRO_R,
@@ -1833,6 +1859,7 @@ private:
INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
+ INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
@@ -1892,6 +1919,10 @@ private:
INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
+ INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"),
+ INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
+ INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
+ INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 2c47541cb..76cfe8107 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -122,6 +122,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
case RenderTargetFormat::RGBA16_UINT:
case RenderTargetFormat::RGBA16_UNORM:
case RenderTargetFormat::RGBA16_FLOAT:
+ case RenderTargetFormat::RGBX16_FLOAT:
case RenderTargetFormat::RG32_FLOAT:
case RenderTargetFormat::RG32_UINT:
return 8;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 78bc0601a..29fa8e95b 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -42,6 +42,7 @@ enum class RenderTargetFormat : u32 {
RGBA16_FLOAT = 0xCA,
RG32_FLOAT = 0xCB,
RG32_UINT = 0xCD,
+ RGBX16_FLOAT = 0xCE,
BGRA8_UNORM = 0xCF,
BGRA8_SRGB = 0xD0,
RGB10_A2_UNORM = 0xD1,
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 084f85e67..ab71870ab 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -83,6 +83,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::RG8U>,
MortonCopy<true, PixelFormat::RG8S>,
MortonCopy<true, PixelFormat::RG32UI>,
+ MortonCopy<true, PixelFormat::RGBX16F>,
MortonCopy<true, PixelFormat::R32UI>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
@@ -151,6 +152,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::RG8U>,
MortonCopy<false, PixelFormat::RG8S>,
MortonCopy<false, PixelFormat::RG32UI>,
+ MortonCopy<false, PixelFormat::RGBX16F>,
MortonCopy<false, PixelFormat::R32UI>,
nullptr,
nullptr,
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
index 7c926bd48..a5d69d78d 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
@@ -35,21 +35,16 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK
local_state.draw.draw_framebuffer = framebuffer.handle;
local_state.ApplyFramebufferState();
- if (key.is_single_buffer) {
- if (key.color_attachments[0] != GL_NONE && key.colors[0]) {
- key.colors[0]->Attach(key.color_attachments[0], GL_DRAW_FRAMEBUFFER);
- glDrawBuffer(key.color_attachments[0]);
- } else {
- glDrawBuffer(GL_NONE);
- }
- } else {
- for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
- if (key.colors[index]) {
- key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
- GL_DRAW_FRAMEBUFFER);
- }
+ for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ if (key.colors[index]) {
+ key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
+ GL_DRAW_FRAMEBUFFER);
}
+ }
+ if (key.colors_count) {
glDrawBuffers(key.colors_count, key.color_attachments.data());
+ } else {
+ glDrawBuffer(GL_NONE);
}
if (key.zeta) {
@@ -67,9 +62,9 @@ std::size_t FramebufferCacheKey::Hash() const {
}
bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const {
- return std::tie(is_single_buffer, stencil_enable, colors_count, color_attachments, colors,
- zeta) == std::tie(rhs.is_single_buffer, rhs.stencil_enable, rhs.colors_count,
- rhs.color_attachments, rhs.colors, rhs.zeta);
+ return std::tie(stencil_enable, colors_count, color_attachments, colors, zeta) ==
+ std::tie(rhs.stencil_enable, rhs.colors_count, rhs.color_attachments, rhs.colors,
+ rhs.zeta);
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
index a3a996353..424344c48 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
@@ -19,7 +19,6 @@
namespace OpenGL {
struct alignas(sizeof(u64)) FramebufferCacheKey {
- bool is_single_buffer = false;
bool stencil_enable = false;
u16 colors_count = 0;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 246b892c5..6a17bed72 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -382,99 +382,51 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
shader_cache.LoadDiskCache(stop_loading, callback);
}
-std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
- OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
- std::optional<std::size_t> single_color_target) {
+void RasterizerOpenGL::ConfigureFramebuffers() {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
-
- const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
- single_color_target};
- if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
- // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
- // single color targets). This is done because the guest registers may not change but the
- // host framebuffer may contain different attachments
- return current_depth_stencil_usage;
+ if (!gpu.dirty.render_settings) {
+ return;
}
gpu.dirty.render_settings = false;
- current_framebuffer_config_state = fb_config_state;
texture_cache.GuardRenderTargets(true);
- View depth_surface{};
- if (using_depth_fb) {
- depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
- } else {
- texture_cache.SetEmptyDepthBuffer();
- }
+ View depth_surface = texture_cache.GetDepthBufferSurface(true);
+ const auto& regs = gpu.regs;
+ state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// Bind the framebuffer surfaces
- current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
-
FramebufferCacheKey fbkey;
+ for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ View color_surface{texture_cache.GetColorBufferSurface(index, true)};
- if (using_color_fb) {
- if (single_color_target) {
- // Used when just a single color attachment is enabled, e.g. for clearing a color buffer
- View color_surface{
- texture_cache.GetColorBufferSurface(*single_color_target, preserve_contents)};
-
- if (color_surface) {
- // Assume that a surface will be written to if it is used as a framebuffer, even if
- // the shader doesn't actually write to it.
- texture_cache.MarkColorBufferInUse(*single_color_target);
- }
-
- fbkey.is_single_buffer = true;
- fbkey.color_attachments[0] =
- GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target);
- fbkey.colors[0] = color_surface;
- for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
- if (index != *single_color_target) {
- texture_cache.SetEmptyColorBuffer(index);
- }
- }
- } else {
- // Multiple color attachments are enabled
- for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
- View color_surface{texture_cache.GetColorBufferSurface(index, preserve_contents)};
-
- if (color_surface) {
- // Assume that a surface will be written to if it is used as a framebuffer, even
- // if the shader doesn't actually write to it.
- texture_cache.MarkColorBufferInUse(index);
- }
-
- fbkey.color_attachments[index] =
- GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
- fbkey.colors[index] = color_surface;
- }
- fbkey.is_single_buffer = false;
- fbkey.colors_count = regs.rt_control.count;
+ if (color_surface) {
+ // Assume that a surface will be written to if it is used as a framebuffer, even
+ // if the shader doesn't actually write to it.
+ texture_cache.MarkColorBufferInUse(index);
}
- } else {
- // No color attachments are enabled - leave them as zero
- fbkey.is_single_buffer = true;
+
+ fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
+ fbkey.colors[index] = std::move(color_surface);
}
+ fbkey.colors_count = regs.rt_control.count;
if (depth_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
texture_cache.MarkDepthBufferInUse();
- fbkey.zeta = depth_surface;
fbkey.stencil_enable = depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
+ fbkey.zeta = std::move(depth_surface);
}
texture_cache.GuardRenderTargets(false);
- current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey);
- SyncViewport(current_state);
-
- return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
+ state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey);
+ SyncViewport(state);
}
void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
@@ -684,7 +636,7 @@ void RasterizerOpenGL::DrawPrelude() {
SetupShaders(primitive_mode);
texture_cache.GuardSamplers(false);
- ConfigureFramebuffers(state);
+ ConfigureFramebuffers();
// Signal the buffer cache that we are not going to upload more things.
const bool invalidate = buffer_cache.Unmap();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 682f0becc..9c10ebda3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -77,39 +77,8 @@ public:
const VideoCore::DiskResourceLoadCallback& callback) override;
private:
- struct FramebufferConfigState {
- bool using_color_fb{};
- bool using_depth_fb{};
- bool preserve_contents{};
- std::optional<std::size_t> single_color_target;
-
- bool operator==(const FramebufferConfigState& rhs) const {
- return std::tie(using_color_fb, using_depth_fb, preserve_contents,
- single_color_target) == std::tie(rhs.using_color_fb, rhs.using_depth_fb,
- rhs.preserve_contents,
- rhs.single_color_target);
- }
- bool operator!=(const FramebufferConfigState& rhs) const {
- return !operator==(rhs);
- }
- };
-
- /**
- * Configures the color and depth framebuffer states.
- *
- * @param current_state The current OpenGL state.
- * @param using_color_fb If true, configure color framebuffers.
- * @param using_depth_fb If true, configure the depth/stencil framebuffer.
- * @param preserve_contents If true, tries to preserve data from a previously used
- * framebuffer.
- * @param single_color_target Specifies if a single color buffer target should be used.
- *
- * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture
- * (requires using_depth_fb to be true)
- */
- std::pair<bool, bool> ConfigureFramebuffers(
- OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
- bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
+ /// Configures the color and depth framebuffer states.
+ void ConfigureFramebuffers();
void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
bool using_depth_fb, bool using_stencil_fb);
@@ -231,9 +200,6 @@ private:
OGLVertexArray>
vertex_array_cache;
- FramebufferConfigState current_framebuffer_config_state;
- std::pair<bool, bool> current_depth_stencil_usage{};
-
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 909ccb82c..0dbc4c02f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,7 +214,8 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
std::string source = "#version 430 core\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"#extension GL_NV_gpu_shader5 : enable\n"
- "#extension GL_NV_shader_thread_group : enable\n";
+ "#extension GL_NV_shader_thread_group : enable\n"
+ "#extension GL_NV_shader_thread_shuffle : enable\n";
if (entries.shader_viewport_layer_array) {
source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index f7e86ab26..74cb59bc1 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1029,10 +1029,10 @@ private:
return {std::move(temporary), value.GetType()};
}
- Expression GetOutputAttribute(const AbufNode* abuf) {
+ std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
switch (const auto attribute = abuf->GetIndex()) {
case Attribute::Index::Position:
- return {"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float};
+ return {{"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}};
case Attribute::Index::LayerViewportPointSize:
switch (abuf->GetElement()) {
case 0:
@@ -1042,25 +1042,25 @@ private:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
- return {"gl_Layer", Type::Int};
+ return {{"gl_Layer", Type::Int}};
case 2:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
- return {"gl_ViewportIndex", Type::Int};
+ return {{"gl_ViewportIndex", Type::Int}};
case 3:
UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
- return {"gl_PointSize", Type::Float};
+ return {{"gl_PointSize", Type::Float}};
}
return {};
case Attribute::Index::ClipDistances0123:
- return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float};
+ return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}};
case Attribute::Index::ClipDistances4567:
- return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float};
+ return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
default:
if (IsGenericAttribute(attribute)) {
- return {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()),
- Type::Float};
+ return {
+ {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
return {};
@@ -1300,7 +1300,11 @@ private:
target = {GetRegister(gpr->GetIndex()), Type::Float};
} else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
- target = GetOutputAttribute(abuf);
+ auto output = GetOutputAttribute(abuf);
+ if (!output) {
+ return {};
+ }
+ target = std::move(*output);
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
@@ -1961,8 +1965,7 @@ private:
Expression BallotThread(Operation operation) {
const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia warp intrinsics are not available and its required by a shader");
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
// Stub on non-Nvidia devices by simulating all threads voting the same as the active
// one.
return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
@@ -1973,8 +1976,7 @@ private:
Expression Vote(Operation operation, const char* func) {
const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia vote intrinsics are not available and its required by a shader");
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
// Stub with a warp size of one.
return {value, Type::Bool};
}
@@ -1991,15 +1993,54 @@ private:
Expression VoteEqual(Operation operation) {
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia vote intrinsics are not available and its required by a shader");
- // We must return true here since a stub for a theoretical warp size of 1 will always
- // return an equal result for all its votes.
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
+ // We must return true here since a stub for a theoretical warp size of 1.
+ // This will always return an equal result across all votes.
return {"true", Type::Bool};
}
return Vote(operation, "allThreadsEqualNV");
}
+ template <const std::string_view& func>
+ Expression Shuffle(Operation operation) {
+ const std::string value = VisitOperand(operation, 0).AsFloat();
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
+ // On a "single-thread" device we are either on the same thread or out of bounds. Both
+ // cases return the passed value.
+ return {value, Type::Float};
+ }
+
+ const std::string index = VisitOperand(operation, 1).AsUint();
+ const std::string width = VisitOperand(operation, 2).AsUint();
+ return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
+ }
+
+ template <const std::string_view& func>
+ Expression InRangeShuffle(Operation operation) {
+ const std::string index = VisitOperand(operation, 0).AsUint();
+ const std::string width = VisitOperand(operation, 1).AsUint();
+ if (!device.HasWarpIntrinsics()) {
+ // On a "single-thread" device we are only in bounds when the requested index is 0.
+ return {fmt::format("({} == 0U)", index), Type::Bool};
+ }
+
+ const std::string in_range = code.GenerateTemporary();
+ code.AddLine("bool {};", in_range);
+ code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
+ return {in_range, Type::Bool};
+ }
+
+ struct Func final {
+ Func() = delete;
+ ~Func() = delete;
+
+ static constexpr std::string_view ShuffleIndexed = "shuffleNV";
+ static constexpr std::string_view ShuffleUp = "shuffleUpNV";
+ static constexpr std::string_view ShuffleDown = "shuffleDownNV";
+ static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
+ };
+
static constexpr std::array operation_decompilers = {
&GLSLDecompiler::Assign,
@@ -2162,6 +2203,16 @@ private:
&GLSLDecompiler::VoteAll,
&GLSLDecompiler::VoteAny,
&GLSLDecompiler::VoteEqual,
+
+ &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleUp>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
+
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 4f135fe03..173b76c4e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -97,6 +97,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
{GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
+ {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBX16F
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index ea77dd211..9ed738171 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -145,7 +145,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
case Tegra::Texture::TextureMipmapFilter::None:
return GL_LINEAR;
case Tegra::Texture::TextureMipmapFilter::Nearest:
- return GL_NEAREST_MIPMAP_LINEAR;
+ return GL_LINEAR_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
return GL_LINEAR_MIPMAP_LINEAR;
}
@@ -157,7 +157,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
case Tegra::Texture::TextureMipmapFilter::Nearest:
return GL_NEAREST_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
- return GL_LINEAR_MIPMAP_NEAREST;
+ return GL_NEAREST_MIPMAP_LINEAR;
}
}
}
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 0bbbf6851..3c5acda3e 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -143,6 +143,7 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S
{vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI
+ {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBX16F
{vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8
{vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index b9153934e..f7fbbb6e4 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1127,6 +1127,46 @@ private:
return {};
}
+ Id ShuffleIndexed(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleUp(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleDown(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleButterfly(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleIndexed(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleUp(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleDown(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleButterfly(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
const std::string& name) {
const Id id = OpVariable(type, storage);
@@ -1431,6 +1471,16 @@ private:
&SPIRVDecompiler::VoteAll,
&SPIRVDecompiler::VoteAny,
&SPIRVDecompiler::VoteEqual,
+
+ &SPIRVDecompiler::ShuffleIndexed,
+ &SPIRVDecompiler::ShuffleUp,
+ &SPIRVDecompiler::ShuffleDown,
+ &SPIRVDecompiler::ShuffleButterfly,
+
+ &SPIRVDecompiler::InRangeShuffleIndexed,
+ &SPIRVDecompiler::InRangeShuffleUp,
+ &SPIRVDecompiler::InRangeShuffleDown,
+ &SPIRVDecompiler::InRangeShuffleButterfly,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index c8c1a7f40..b73f6536e 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -138,6 +138,35 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, value);
break;
}
+ case OpCode::Id::ICMP_CR:
+ case OpCode::Id::ICMP_R:
+ case OpCode::Id::ICMP_RC:
+ case OpCode::Id::ICMP_IMM: {
+ const Node zero = Immediate(0);
+
+ const auto [op_b, test] = [&]() -> std::pair<Node, Node> {
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::ICMP_CR:
+ return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+ GetRegister(instr.gpr39)};
+ case OpCode::Id::ICMP_R:
+ return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
+ case OpCode::Id::ICMP_RC:
+ return {GetRegister(instr.gpr39),
+ GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+ case OpCode::Id::ICMP_IMM:
+ return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
+ default:
+ UNREACHABLE();
+ return {zero, zero};
+ }
+ }();
+ const Node op_a = GetRegister(instr.gpr8);
+ const Node comparison =
+ GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
+ SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b));
+ break;
+ }
case OpCode::Id::LOP_C:
case OpCode::Id::LOP_R:
case OpCode::Id::LOP_IMM: {
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index 04ca74f46..a8e481b3c 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -13,6 +13,7 @@ namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
+using Tegra::Shader::ShuffleOperation;
using Tegra::Shader::VoteOperation;
namespace {
@@ -44,6 +45,52 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
SetPredicate(bb, instr.vote.dest_pred, vote);
break;
}
+ case OpCode::Id::SHFL: {
+ Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
+ : GetRegister(instr.gpr39);
+ Node width = [&] {
+ // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
+ // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
+ // different parameters that don't properly map to GLSL's interface, but it should work
+ // for cases emitted by Nvidia's compiler.
+ if (instr.shfl.operation == ShuffleOperation::Up) {
+ return Operation(
+ OperationCode::ILogicalShiftRight,
+ Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
+ Immediate(8));
+ } else {
+ return Operation(OperationCode::ILogicalShiftRight,
+ Operation(OperationCode::IAdd, Immediate(0x201F),
+ Operation(OperationCode::INegate, std::move(mask))),
+ Immediate(8));
+ }
+ }();
+
+ const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
+ switch (instr.shfl.operation) {
+ case ShuffleOperation::Idx:
+ return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
+ case ShuffleOperation::Up:
+ return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
+ case ShuffleOperation::Down:
+ return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
+ case ShuffleOperation::Bfly:
+ return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
+ }
+ UNREACHABLE_MSG("Invalid SHFL operation: {}",
+ static_cast<u64>(instr.shfl.operation.Value()));
+ return {};
+ }();
+
+ // Setting the predicate before the register is intentional to avoid overwriting.
+ Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
+ : GetRegister(instr.gpr20);
+ SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
+ SetRegister(
+ bb, instr.gpr0,
+ Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
+ break;
+ }
default:
UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
break;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 425111cc4..abf2cb1ab 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -181,6 +181,16 @@ enum class OperationCode {
VoteAny, /// (bool) -> bool
VoteEqual, /// (bool) -> bool
+ ShuffleIndexed, /// (uint value, uint index, uint width) -> uint
+ ShuffleUp, /// (uint value, uint index, uint width) -> uint
+ ShuffleDown, /// (uint value, uint index, uint width) -> uint
+ ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
+
+ InRangeShuffleIndexed, /// (uint index, uint width) -> bool
+ InRangeShuffleUp, /// (uint index, uint width) -> bool
+ InRangeShuffleDown, /// (uint index, uint width) -> bool
+ InRangeShuffleButterfly, /// (uint index, uint width) -> bool
+
Amount,
};
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 53d0142cb..250afc6d6 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -159,6 +159,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
return PixelFormat::R32UI;
case Tegra::RenderTargetFormat::RG32_UINT:
return PixelFormat::RG32UI;
+ case Tegra::RenderTargetFormat::RGBX16_FLOAT:
+ return PixelFormat::RGBX16F;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -415,6 +417,7 @@ ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
case Tegra::RenderTargetFormat::RG8_SNORM:
return ComponentType::SNorm;
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+ case Tegra::RenderTargetFormat::RGBX16_FLOAT:
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
case Tegra::RenderTargetFormat::RGBA32_FLOAT:
case Tegra::RenderTargetFormat::RG32_FLOAT:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 19268b7cd..1e1c432a5 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -57,36 +57,37 @@ enum class PixelFormat {
RG8U = 39,
RG8S = 40,
RG32UI = 41,
- R32UI = 42,
- ASTC_2D_8X8 = 43,
- ASTC_2D_8X5 = 44,
- ASTC_2D_5X4 = 45,
- BGRA8_SRGB = 46,
- DXT1_SRGB = 47,
- DXT23_SRGB = 48,
- DXT45_SRGB = 49,
- BC7U_SRGB = 50,
- ASTC_2D_4X4_SRGB = 51,
- ASTC_2D_8X8_SRGB = 52,
- ASTC_2D_8X5_SRGB = 53,
- ASTC_2D_5X4_SRGB = 54,
- ASTC_2D_5X5 = 55,
- ASTC_2D_5X5_SRGB = 56,
- ASTC_2D_10X8 = 57,
- ASTC_2D_10X8_SRGB = 58,
+ RGBX16F = 42,
+ R32UI = 43,
+ ASTC_2D_8X8 = 44,
+ ASTC_2D_8X5 = 45,
+ ASTC_2D_5X4 = 46,
+ BGRA8_SRGB = 47,
+ DXT1_SRGB = 48,
+ DXT23_SRGB = 49,
+ DXT45_SRGB = 50,
+ BC7U_SRGB = 51,
+ ASTC_2D_4X4_SRGB = 52,
+ ASTC_2D_8X8_SRGB = 53,
+ ASTC_2D_8X5_SRGB = 54,
+ ASTC_2D_5X4_SRGB = 55,
+ ASTC_2D_5X5 = 56,
+ ASTC_2D_5X5_SRGB = 57,
+ ASTC_2D_10X8 = 58,
+ ASTC_2D_10X8_SRGB = 59,
MaxColorFormat,
// Depth formats
- Z32F = 59,
- Z16 = 60,
+ Z32F = 60,
+ Z16 = 61,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 61,
- S8Z24 = 62,
- Z32FS8 = 63,
+ Z24S8 = 62,
+ S8Z24 = 63,
+ Z32FS8 = 64,
MaxDepthStencilFormat,
@@ -166,6 +167,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
0, // RG8U
0, // RG8S
0, // RG32UI
+ 0, // RGBX16F
0, // R32UI
2, // ASTC_2D_8X8
2, // ASTC_2D_8X5
@@ -249,6 +251,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
1, // RG8U
1, // RG8S
1, // RG32UI
+ 1, // RGBX16F
1, // R32UI
8, // ASTC_2D_8X8
8, // ASTC_2D_8X5
@@ -324,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
1, // RG8U
1, // RG8S
1, // RG32UI
+ 1, // RGBX16F
1, // R32UI
8, // ASTC_2D_8X8
5, // ASTC_2D_8X5
@@ -399,6 +403,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
16, // RG8U
16, // RG8S
64, // RG32UI
+ 64, // RGBX16F
32, // R32UI
128, // ASTC_2D_8X8
128, // ASTC_2D_8X5
@@ -489,6 +494,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::None, // RG8U
SurfaceCompression::None, // RG8S
SurfaceCompression::None, // RG32UI
+ SurfaceCompression::None, // RGBX16F
SurfaceCompression::None, // R32UI
SurfaceCompression::Converted, // ASTC_2D_8X8
SurfaceCompression::Converted, // ASTC_2D_8X5