aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/shader_bytecode.h10
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp42
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp84
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp20
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h15
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_state.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp49
-rw-r--r--src/video_core/shader/decode/warp.cpp79
-rw-r--r--src/video_core/shader/node.h12
-rw-r--r--src/video_core/surface.cpp2
15 files changed, 171 insertions, 204 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 78d6886fb..9fafed4a2 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -616,6 +616,14 @@ union Instruction {
} shfl;
union {
+ BitField<44, 1, u64> ftz;
+ BitField<39, 2, u64> tab5cb8_2;
+ BitField<38, 1, u64> ndv;
+ BitField<47, 1, u64> cc;
+ BitField<28, 8, u64> swizzle;
+ } fswzadd;
+
+ union {
BitField<8, 8, Register> gpr;
BitField<20, 24, s64> offset;
} gmem;
@@ -1592,6 +1600,7 @@ public:
DEPBAR,
VOTE,
SHFL,
+ FSWZADD,
BFE_C,
BFE_R,
BFE_IMM,
@@ -1890,6 +1899,7 @@ private:
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
+ INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index c65b24c69..b30d5be74 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -62,6 +62,7 @@ Device::Device() {
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
+ has_shader_ballot = GLAD_GL_ARB_shader_ballot;
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
has_variable_aoffi = TestVariableAoffi();
@@ -79,6 +80,7 @@ Device::Device(std::nullptr_t) {
max_vertex_attributes = 16;
max_varyings = 15;
has_warp_intrinsics = true;
+ has_shader_ballot = true;
has_vertex_viewport_layer = true;
has_image_load_formatted = true;
has_variable_aoffi = true;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index bf35bd0b6..6c86fe207 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -34,6 +34,10 @@ public:
return has_warp_intrinsics;
}
+ bool HasShaderBallot() const {
+ return has_shader_ballot;
+ }
+
bool HasVertexViewportLayer() const {
return has_vertex_viewport_layer;
}
@@ -68,6 +72,7 @@ private:
u32 max_vertex_attributes{};
u32 max_varyings{};
bool has_warp_intrinsics{};
+ bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
bool has_image_load_formatted{};
bool has_variable_aoffi{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e43ba9d6b..05f8e511b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -257,10 +257,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
continue;
}
- const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
-
GLShader::MaxwellUniformData ubo{};
- ubo.SetFromRegs(gpu, stage);
+ ubo.SetFromRegs(gpu);
const auto [buffer, offset] =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
@@ -269,10 +267,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
Shader shader{shader_cache.GetStageProgram(program)};
- const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
- SetupDrawConstBuffers(stage_enum, shader);
- SetupDrawGlobalMemory(stage_enum, shader);
- const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)};
+ // Stage indices are 0 - 5
+ const auto stage = static_cast<Maxwell::ShaderStage>(index == 0 ? 0 : index - 1);
+ SetupDrawConstBuffers(stage, shader);
+ SetupDrawGlobalMemory(stage, shader);
+ const auto texture_buffer_usage{SetupDrawTextures(stage, shader, base_bindings)};
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
@@ -1055,6 +1054,15 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
}
state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
+
+ bool flip_y = false;
+ if (regs.viewport_transform[0].scale_y < 0.0) {
+ flip_y = !flip_y;
+ }
+ if (regs.screen_y_control.y_negate != 0) {
+ flip_y = !flip_y;
+ }
+ state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
}
void RasterizerOpenGL::SyncClipEnabled(
@@ -1077,28 +1085,14 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- auto& maxwell3d = system.GPU().Maxwell3D();
-
- const auto& regs = maxwell3d.regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.cull.enabled = regs.cull.enabled != 0;
if (state.cull.enabled) {
- state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
-
- const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 ||
- regs.viewport_transform[0].scale_y < 0.0f};
-
- // If the GPU is configured to flip the rasterized triangles, then we need to flip the
- // notion of front and back. Note: We flip the triangles when the value of the register is 0
- // because OpenGL already does it for us.
- if (flip_triangles) {
- if (state.cull.front_face == GL_CCW)
- state.cull.front_face = GL_CW;
- else if (state.cull.front_face == GL_CW)
- state.cull.front_face = GL_CCW;
- }
}
+
+ state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 35e5214a5..04a239a39 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -275,16 +275,25 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
std::string source = fmt::format(R"(// {}
#version 430 core
#extension GL_ARB_separate_shader_objects : enable
-#extension GL_ARB_shader_viewport_layer_array : enable
-#extension GL_EXT_shader_image_load_formatted : enable
-#extension GL_NV_gpu_shader5 : enable
-#extension GL_NV_shader_thread_group : enable
-#extension GL_NV_shader_thread_shuffle : enable
)",
GetShaderId(unique_identifier, program_type));
if (is_compute) {
source += "#extension GL_ARB_compute_variable_group_size : require\n";
}
+ if (device.HasShaderBallot()) {
+ source += "#extension GL_ARB_shader_ballot : require\n";
+ }
+ if (device.HasVertexViewportLayer()) {
+ source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
+ }
+ if (device.HasImageLoadFormatted()) {
+ source += "#extension GL_EXT_shader_image_load_formatted : require\n";
+ }
+ if (device.HasWarpIntrinsics()) {
+ source += "#extension GL_NV_gpu_shader5 : require\n"
+ "#extension GL_NV_shader_thread_group : require\n"
+ "#extension GL_NV_shader_thread_shuffle : require\n";
+ }
source += '\n';
if (!is_compute) {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 0ce59a852..4f2b49170 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1379,6 +1379,26 @@ private:
return GenerateUnary(operation, "float", Type::Float, type);
}
+ Expression FSwizzleAdd(Operation operation) {
+ const std::string op_a = VisitOperand(operation, 0).AsFloat();
+ const std::string op_b = VisitOperand(operation, 1).AsFloat();
+
+ if (!device.HasShaderBallot()) {
+ LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+ return {fmt::format("{} + {}", op_a, op_b), Type::Float};
+ }
+
+ const std::string instr_mask = VisitOperand(operation, 2).AsUint();
+ const std::string mask = code.GenerateTemporary();
+ code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
+ instr_mask);
+
+ const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
+ const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
+ return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
+ Type::Float};
+ }
+
Expression ICastFloat(Operation operation) {
return GenerateUnary(operation, "int", Type::Int, Type::Float);
}
@@ -1872,10 +1892,6 @@ private:
Expression EmitVertex(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EmitVertex is expected to be used in a geometry shader.");
-
- // If a geometry shader is attached, it will always flip (it's the last stage before
- // fragment). For more info about flipping, refer to gl_shader_gen.cpp.
- code.AddLine("gl_Position.xy *= viewport_flip.xy;");
code.AddLine("EmitVertex();");
return {};
}
@@ -1883,14 +1899,12 @@ private:
Expression EndPrimitive(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EndPrimitive is expected to be used in a geometry shader.");
-
code.AddLine("EndPrimitive();");
return {};
}
Expression YNegate(Operation operation) {
- // Config pack's third value is Y_NEGATE's state.
- return {"config_pack[2]", Type::Uint};
+ return {"y_direction", Type::Float};
}
template <u32 element>
@@ -1942,34 +1956,24 @@ private:
return Vote(operation, "allThreadsEqualNV");
}
- template <const std::string_view& func>
- Expression Shuffle(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsFloat();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
- // On a "single-thread" device we are either on the same thread or out of bounds. Both
- // cases return the passed value.
- return {value, Type::Float};
+ Expression ThreadId(Operation operation) {
+ if (!device.HasShaderBallot()) {
+ LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+ return {"0U", Type::Uint};
}
-
- const std::string index = VisitOperand(operation, 1).AsUint();
- const std::string width = VisitOperand(operation, 2).AsUint();
- return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
+ return {"gl_SubGroupInvocationARB", Type::Uint};
}
- template <const std::string_view& func>
- Expression InRangeShuffle(Operation operation) {
- const std::string index = VisitOperand(operation, 0).AsUint();
- const std::string width = VisitOperand(operation, 1).AsUint();
- if (!device.HasWarpIntrinsics()) {
- // On a "single-thread" device we are only in bounds when the requested index is 0.
- return {fmt::format("({} == 0U)", index), Type::Bool};
+ Expression ShuffleIndexed(Operation operation) {
+ std::string value = VisitOperand(operation, 0).AsFloat();
+
+ if (!device.HasShaderBallot()) {
+ LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+ return {std::move(value), Type::Float};
}
- const std::string in_range = code.GenerateTemporary();
- code.AddLine("bool {};", in_range);
- code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
- return {in_range, Type::Bool};
+ const std::string index = VisitOperand(operation, 1).AsUint();
+ return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
}
struct Func final {
@@ -1981,11 +1985,6 @@ private:
static constexpr std::string_view Or = "Or";
static constexpr std::string_view Xor = "Xor";
static constexpr std::string_view Exchange = "Exchange";
-
- static constexpr std::string_view ShuffleIndexed = "shuffleNV";
- static constexpr std::string_view ShuffleUp = "shuffleUpNV";
- static constexpr std::string_view ShuffleDown = "shuffleDownNV";
- static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
};
static constexpr std::array operation_decompilers = {
@@ -2016,6 +2015,7 @@ private:
&GLSLDecompiler::FTrunc,
&GLSLDecompiler::FCastInteger<Type::Int>,
&GLSLDecompiler::FCastInteger<Type::Uint>,
+ &GLSLDecompiler::FSwizzleAdd,
&GLSLDecompiler::Add<Type::Int>,
&GLSLDecompiler::Mul<Type::Int>,
@@ -2151,15 +2151,8 @@ private:
&GLSLDecompiler::VoteAny,
&GLSLDecompiler::VoteEqual,
- &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>,
- &GLSLDecompiler::Shuffle<Func::ShuffleUp>,
- &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
- &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
-
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
+ &GLSLDecompiler::ThreadId,
+ &GLSLDecompiler::ShuffleIndexed,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2492,6 +2485,9 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
bvec2 is_nan2 = isnan(pair2);
return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
}
+
+const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
+const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
)";
}
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 0e22eede9..af17216bd 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -20,8 +20,7 @@ std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const
std::string out = GetCommonDeclarations();
out += R"(
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
- vec4 viewport_flip;
- uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
+ float y_direction;
};
)";
@@ -35,23 +34,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
void main() {
execute_vertex();
)";
-
if (ir_b) {
out += " execute_vertex_b();";
}
-
- out += R"(
-
- // Set Position Y direction
- gl_Position.y *= utof(config_pack[2]);
- // Check if the flip stage is VertexB
- // Config pack's second value is flip_stage
- if (config_pack[1] == 1) {
- // Viewport can be flipped, which is unsupported by glViewport
- gl_Position.xy *= viewport_flip.xy;
- }
-}
-)";
+ out += "}\n";
return out;
}
@@ -59,8 +45,7 @@ std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
out += R"(
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
- vec4 viewport_flip;
- uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
+ float y_direction;
};
)";
@@ -87,8 +72,7 @@ layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
- vec4 viewport_flip;
- uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
+ float y_direction;
};
)";
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index b05f90f20..75d3fac04 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -40,27 +40,11 @@ void ProgramManager::UpdatePipeline() {
old_state = current_state;
}
-void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
+void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) {
const auto& regs = maxwell.regs;
- const auto& state = maxwell.state;
-
- // TODO(bunnei): Support more than one viewport
- viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
- viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
-
- instance_id = state.current_instance;
-
- // Assign in which stage the position has to be flipped
- // (the last stage before the fragment shader).
- constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
- if (maxwell.regs.shader_config[geometry_index].enable) {
- flip_stage = geometry_index;
- } else {
- flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
- }
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
- y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f;
+ y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
}
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 6961e702a..3703e7018 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -18,17 +18,12 @@ namespace OpenGL::GLShader {
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
/// Not following that rule will cause problems on some AMD drivers.
-struct MaxwellUniformData {
- void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
-
- alignas(16) GLvec4 viewport_flip;
- struct alignas(16) {
- GLuint instance_id;
- GLuint flip_stage;
- GLfloat y_direction;
- };
+struct alignas(16) MaxwellUniformData {
+ void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
+
+ GLfloat y_direction;
};
-static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index f25148362..ccbe5912e 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -410,6 +410,12 @@ void OpenGLState::ApplyAlphaTest() {
}
}
+void OpenGLState::ApplyClipControl() {
+ if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) {
+ glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE);
+ }
+}
+
void OpenGLState::ApplyTextures() {
if (const auto update = UpdateArray(cur_state.textures, textures)) {
glBindTextures(update->first, update->second, textures.data() + update->first);
@@ -453,6 +459,7 @@ void OpenGLState::Apply() {
ApplyImages();
ApplyPolygonOffset();
ApplyAlphaTest();
+ ApplyClipControl();
}
void OpenGLState::EmulateViewportWithScissor() {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index cca25206b..eaff22bda 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -146,6 +146,10 @@ public:
std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE
+ struct {
+ GLenum origin = GL_LOWER_LEFT;
+ } clip_control;
+
OpenGLState();
/// Get the currently active OpenGL state
@@ -182,6 +186,7 @@ public:
void ApplyDepthClamp();
void ApplyPolygonOffset();
void ApplyAlphaTest();
+ void ApplyClipControl();
/// Resets any references to the given resource
OpenGLState& UnbindTexture(GLuint handle);
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 42cf068b6..2850d5b59 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -783,6 +783,11 @@ private:
return {};
}
+ Id FSwizzleAdd(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id HNegate(Operation operation) {
UNIMPLEMENTED();
return {};
@@ -1195,42 +1200,12 @@ private:
return {};
}
- Id ShuffleIndexed(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id ShuffleUp(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id ShuffleDown(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id ShuffleButterfly(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id InRangeShuffleIndexed(Operation) {
+ Id ThreadId(Operation) {
UNIMPLEMENTED();
return {};
}
- Id InRangeShuffleUp(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id InRangeShuffleDown(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id InRangeShuffleButterfly(Operation) {
+ Id ShuffleIndexed(Operation) {
UNIMPLEMENTED();
return {};
}
@@ -1393,6 +1368,7 @@ private:
&SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
&SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
&SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
+ &SPIRVDecompiler::FSwizzleAdd,
&SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
&SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
@@ -1528,15 +1504,8 @@ private:
&SPIRVDecompiler::VoteAny,
&SPIRVDecompiler::VoteEqual,
+ &SPIRVDecompiler::ThreadId,
&SPIRVDecompiler::ShuffleIndexed,
- &SPIRVDecompiler::ShuffleUp,
- &SPIRVDecompiler::ShuffleDown,
- &SPIRVDecompiler::ShuffleButterfly,
-
- &SPIRVDecompiler::InRangeShuffleIndexed,
- &SPIRVDecompiler::InRangeShuffleUp,
- &SPIRVDecompiler::InRangeShuffleDown,
- &SPIRVDecompiler::InRangeShuffleButterfly,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index fa8a250cc..d98d0e1dd 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation;
using Tegra::Shader::VoteOperation;
namespace {
+
OperationCode GetOperationCode(VoteOperation vote_op) {
switch (vote_op) {
case VoteOperation::All:
@@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) {
return OperationCode::VoteAll;
}
}
+
} // Anonymous namespace
u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
@@ -46,50 +48,59 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::SHFL: {
- Node width = [this, instr] {
- Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
- : GetRegister(instr.gpr39);
-
- // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
- // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
- // different parameters that don't properly map to GLSL's interface, but it should work
- // for cases emitted by Nvidia's compiler.
- if (instr.shfl.operation == ShuffleOperation::Up) {
- return Operation(
- OperationCode::ILogicalShiftRight,
- Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
- Immediate(8));
- } else {
- return Operation(OperationCode::ILogicalShiftRight,
- Operation(OperationCode::IAdd, Immediate(0x201F),
- Operation(OperationCode::INegate, std::move(mask))),
- Immediate(8));
- }
- }();
+ Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
+ : GetRegister(instr.gpr39);
+ Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
+ : GetRegister(instr.gpr20);
+
+ Node thread_id = Operation(OperationCode::ThreadId);
+ Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
+ Node seg_mask = BitfieldExtract(mask, 8, 16);
- const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
+ Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
+ Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
+ Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
+ Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
+
+ Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
switch (instr.shfl.operation) {
case ShuffleOperation::Idx:
- return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
- case ShuffleOperation::Up:
- return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
+ return Operation(OperationCode::IBitwiseOr,
+ Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
+ min_thread_id);
case ShuffleOperation::Down:
- return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
+ return Operation(OperationCode::IAdd, thread_id, index);
+ case ShuffleOperation::Up:
+ return Operation(OperationCode::IAdd, thread_id,
+ Operation(OperationCode::INegate, index));
case ShuffleOperation::Bfly:
- return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
+ return Operation(OperationCode::IBitwiseXor, thread_id, index);
}
- UNREACHABLE_MSG("Invalid SHFL operation: {}",
- static_cast<u64>(instr.shfl.operation.Value()));
- return {};
+ UNREACHABLE();
+ return Immediate(0U);
}();
- // Setting the predicate before the register is intentional to avoid overwriting.
- Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
- : GetRegister(instr.gpr20);
- SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
+ Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
+ if (instr.shfl.operation == ShuffleOperation::Up) {
+ return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
+ } else {
+ return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
+ }
+ }();
+
+ SetPredicate(bb, instr.shfl.pred48, in_bounds);
SetRegister(
bb, instr.gpr0,
- Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
+ Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
+ break;
+ }
+ case OpCode::Id::FSWZADD: {
+ UNIMPLEMENTED_IF(instr.fswzadd.ndv);
+
+ Node op_a = GetRegister(instr.gpr8);
+ Node op_b = GetRegister(instr.gpr20);
+ Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
+ SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
break;
}
default:
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4300d9ff4..54217e6a4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -47,6 +47,7 @@ enum class OperationCode {
FTrunc, /// (MetaArithmetic, float a) -> float
FCastInteger, /// (MetaArithmetic, int a) -> float
FCastUInteger, /// (MetaArithmetic, uint a) -> float
+ FSwizzleAdd, /// (float a, float b, uint mask) -> float
IAdd, /// (MetaArithmetic, int a, int b) -> int
IMul, /// (MetaArithmetic, int a, int b) -> int
@@ -181,15 +182,8 @@ enum class OperationCode {
VoteAny, /// (bool) -> bool
VoteEqual, /// (bool) -> bool
- ShuffleIndexed, /// (uint value, uint index, uint width) -> uint
- ShuffleUp, /// (uint value, uint index, uint width) -> uint
- ShuffleDown, /// (uint value, uint index, uint width) -> uint
- ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
-
- InRangeShuffleIndexed, /// (uint index, uint width) -> bool
- InRangeShuffleUp, /// (uint index, uint width) -> bool
- InRangeShuffleDown, /// (uint index, uint width) -> bool
- InRangeShuffleButterfly, /// (uint index, uint width) -> bool
+ ThreadId, /// () -> uint
+ ShuffleIndexed, /// (uint value, uint index) -> uint
Amount,
};
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 621136b6e..4b6846113 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -249,6 +249,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::RGBA16U;
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::RGBA16F;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::RGBA16UI;
default:
break;
}