diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 42 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 84 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 24 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.cpp | 20 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.h | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/shader/decode/warp.cpp | 79 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 12 | ||||
| -rw-r--r-- | src/video_core/surface.cpp | 2 |
15 files changed, 171 insertions, 204 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 78d6886fb..9fafed4a2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -616,6 +616,14 @@ union Instruction { } shfl; union { + BitField<44, 1, u64> ftz; + BitField<39, 2, u64> tab5cb8_2; + BitField<38, 1, u64> ndv; + BitField<47, 1, u64> cc; + BitField<28, 8, u64> swizzle; + } fswzadd; + + union { BitField<8, 8, Register> gpr; BitField<20, 24, s64> offset; } gmem; @@ -1592,6 +1600,7 @@ public: DEPBAR, VOTE, SHFL, + FSWZADD, BFE_C, BFE_R, BFE_IMM, @@ -1890,6 +1899,7 @@ private: INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), + INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c65b24c69..b30d5be74 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -62,6 +62,7 @@ Device::Device() { max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; + has_shader_ballot = GLAD_GL_ARB_shader_ballot; has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); has_variable_aoffi = TestVariableAoffi(); @@ -79,6 +80,7 @@ Device::Device(std::nullptr_t) { max_vertex_attributes = 16; max_varyings = 15; has_warp_intrinsics = true; + has_shader_ballot = true; has_vertex_viewport_layer = true; has_image_load_formatted = true; has_variable_aoffi = true; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index bf35bd0b6..6c86fe207 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -34,6 +34,10 @@ public: return has_warp_intrinsics; } + bool HasShaderBallot() const { + return has_shader_ballot; + } + bool HasVertexViewportLayer() const { return has_vertex_viewport_layer; } @@ -68,6 +72,7 @@ private: u32 max_vertex_attributes{}; u32 max_varyings{}; bool has_warp_intrinsics{}; + bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; bool has_variable_aoffi{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e43ba9d6b..05f8e511b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -257,10 +257,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { continue; } - const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 - GLShader::MaxwellUniformData ubo{}; - ubo.SetFromRegs(gpu, stage); + ubo.SetFromRegs(gpu); const auto [buffer, offset] = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); @@ -269,10 +267,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { Shader shader{shader_cache.GetStageProgram(program)}; - const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); - SetupDrawConstBuffers(stage_enum, shader); - SetupDrawGlobalMemory(stage_enum, shader); - const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)}; + // Stage indices are 0 - 5 + const auto stage = static_cast<Maxwell::ShaderStage>(index == 0 ? 0 : index - 1); + SetupDrawConstBuffers(stage, shader); + SetupDrawGlobalMemory(stage, shader); + const auto texture_buffer_usage{SetupDrawTextures(stage, shader, base_bindings)}; const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); @@ -1055,6 +1054,15 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { } state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0; state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; + + bool flip_y = false; + if (regs.viewport_transform[0].scale_y < 0.0) { + flip_y = !flip_y; + } + if (regs.screen_y_control.y_negate != 0) { + flip_y = !flip_y; + } + state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; } void RasterizerOpenGL::SyncClipEnabled( @@ -1077,28 +1085,14 @@ void RasterizerOpenGL::SyncClipCoef() { } void RasterizerOpenGL::SyncCullMode() { - auto& maxwell3d = system.GPU().Maxwell3D(); - - const auto& regs = maxwell3d.regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.cull.enabled = regs.cull.enabled != 0; if (state.cull.enabled) { - state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); - - const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 || - regs.viewport_transform[0].scale_y < 0.0f}; - - // If the GPU is configured to flip the rasterized triangles, then we need to flip the - // notion of front and back. Note: We flip the triangles when the value of the register is 0 - // because OpenGL already does it for us. - if (flip_triangles) { - if (state.cull.front_face == GL_CCW) - state.cull.front_face = GL_CW; - else if (state.cull.front_face == GL_CW) - state.cull.front_face = GL_CCW; - } } + + state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); } void RasterizerOpenGL::SyncPrimitiveRestart() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 35e5214a5..04a239a39 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -275,16 +275,25 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy std::string source = fmt::format(R"(// {} #version 430 core #extension GL_ARB_separate_shader_objects : enable -#extension GL_ARB_shader_viewport_layer_array : enable -#extension GL_EXT_shader_image_load_formatted : enable -#extension GL_NV_gpu_shader5 : enable -#extension GL_NV_shader_thread_group : enable -#extension GL_NV_shader_thread_shuffle : enable )", GetShaderId(unique_identifier, program_type)); if (is_compute) { source += "#extension GL_ARB_compute_variable_group_size : require\n"; } + if (device.HasShaderBallot()) { + source += "#extension GL_ARB_shader_ballot : require\n"; + } + if (device.HasVertexViewportLayer()) { + source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; + } + if (device.HasImageLoadFormatted()) { + source += "#extension GL_EXT_shader_image_load_formatted : require\n"; + } + if (device.HasWarpIntrinsics()) { + source += "#extension GL_NV_gpu_shader5 : require\n" + "#extension GL_NV_shader_thread_group : require\n" + "#extension GL_NV_shader_thread_shuffle : require\n"; + } source += '\n'; if (!is_compute) { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0ce59a852..4f2b49170 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1379,6 +1379,26 @@ private: return GenerateUnary(operation, "float", Type::Float, type); } + Expression FSwizzleAdd(Operation operation) { + const std::string op_a = VisitOperand(operation, 0).AsFloat(); + const std::string op_b = VisitOperand(operation, 1).AsFloat(); + + if (!device.HasShaderBallot()) { + LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); + return {fmt::format("{} + {}", op_a, op_b), Type::Float}; + } + + const std::string instr_mask = VisitOperand(operation, 2).AsUint(); + const std::string mask = code.GenerateTemporary(); + code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, + instr_mask); + + const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); + const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); + return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), + Type::Float}; + } + Expression ICastFloat(Operation operation) { return GenerateUnary(operation, "int", Type::Int, Type::Float); } @@ -1872,10 +1892,6 @@ private: Expression EmitVertex(Operation operation) { ASSERT_MSG(stage == ProgramType::Geometry, "EmitVertex is expected to be used in a geometry shader."); - - // If a geometry shader is attached, it will always flip (it's the last stage before - // fragment). For more info about flipping, refer to gl_shader_gen.cpp. - code.AddLine("gl_Position.xy *= viewport_flip.xy;"); code.AddLine("EmitVertex();"); return {}; } @@ -1883,14 +1899,12 @@ private: Expression EndPrimitive(Operation operation) { ASSERT_MSG(stage == ProgramType::Geometry, "EndPrimitive is expected to be used in a geometry shader."); - code.AddLine("EndPrimitive();"); return {}; } Expression YNegate(Operation operation) { - // Config pack's third value is Y_NEGATE's state. - return {"config_pack[2]", Type::Uint}; + return {"y_direction", Type::Float}; } template <u32 element> @@ -1942,34 +1956,24 @@ private: return Vote(operation, "allThreadsEqualNV"); } - template <const std::string_view& func> - Expression Shuffle(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsFloat(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); - // On a "single-thread" device we are either on the same thread or out of bounds. Both - // cases return the passed value. - return {value, Type::Float}; + Expression ThreadId(Operation operation) { + if (!device.HasShaderBallot()) { + LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); + return {"0U", Type::Uint}; } - - const std::string index = VisitOperand(operation, 1).AsUint(); - const std::string width = VisitOperand(operation, 2).AsUint(); - return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; + return {"gl_SubGroupInvocationARB", Type::Uint}; } - template <const std::string_view& func> - Expression InRangeShuffle(Operation operation) { - const std::string index = VisitOperand(operation, 0).AsUint(); - const std::string width = VisitOperand(operation, 1).AsUint(); - if (!device.HasWarpIntrinsics()) { - // On a "single-thread" device we are only in bounds when the requested index is 0. - return {fmt::format("({} == 0U)", index), Type::Bool}; + Expression ShuffleIndexed(Operation operation) { + std::string value = VisitOperand(operation, 0).AsFloat(); + + if (!device.HasShaderBallot()) { + LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); + return {std::move(value), Type::Float}; } - const std::string in_range = code.GenerateTemporary(); - code.AddLine("bool {};", in_range); - code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); - return {in_range, Type::Bool}; + const std::string index = VisitOperand(operation, 1).AsUint(); + return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; } struct Func final { @@ -1981,11 +1985,6 @@ private: static constexpr std::string_view Or = "Or"; static constexpr std::string_view Xor = "Xor"; static constexpr std::string_view Exchange = "Exchange"; - - static constexpr std::string_view ShuffleIndexed = "shuffleNV"; - static constexpr std::string_view ShuffleUp = "shuffleUpNV"; - static constexpr std::string_view ShuffleDown = "shuffleDownNV"; - static constexpr std::string_view ShuffleButterfly = "shuffleXorNV"; }; static constexpr std::array operation_decompilers = { @@ -2016,6 +2015,7 @@ private: &GLSLDecompiler::FTrunc, &GLSLDecompiler::FCastInteger<Type::Int>, &GLSLDecompiler::FCastInteger<Type::Uint>, + &GLSLDecompiler::FSwizzleAdd, &GLSLDecompiler::Add<Type::Int>, &GLSLDecompiler::Mul<Type::Int>, @@ -2151,15 +2151,8 @@ private: &GLSLDecompiler::VoteAny, &GLSLDecompiler::VoteEqual, - &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, - &GLSLDecompiler::Shuffle<Func::ShuffleUp>, - &GLSLDecompiler::Shuffle<Func::ShuffleDown>, - &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, - - &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, - &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, - &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, - &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, + &GLSLDecompiler::ThreadId, + &GLSLDecompiler::ShuffleIndexed, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); @@ -2492,6 +2485,9 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { bvec2 is_nan2 = isnan(pair2); return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); } + +const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); +const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); )"; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 0e22eede9..af17216bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -20,8 +20,7 @@ std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const std::string out = GetCommonDeclarations(); out += R"( layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { - vec4 viewport_flip; - uvec4 config_pack; // instance_id, flip_stage, y_direction, padding + float y_direction; }; )"; @@ -35,23 +34,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { void main() { execute_vertex(); )"; - if (ir_b) { out += " execute_vertex_b();"; } - - out += R"( - - // Set Position Y direction - gl_Position.y *= utof(config_pack[2]); - // Check if the flip stage is VertexB - // Config pack's second value is flip_stage - if (config_pack[1] == 1) { - // Viewport can be flipped, which is unsupported by glViewport - gl_Position.xy *= viewport_flip.xy; - } -} -)"; + out += "}\n"; return out; } @@ -59,8 +45,7 @@ std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { std::string out = GetCommonDeclarations(); out += R"( layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { - vec4 viewport_flip; - uvec4 config_pack; // instance_id, flip_stage, y_direction, padding + float y_direction; }; )"; @@ -87,8 +72,7 @@ layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { - vec4 viewport_flip; - uvec4 config_pack; // instance_id, flip_stage, y_direction, padding + float y_direction; }; )"; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index b05f90f20..75d3fac04 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -40,27 +40,11 @@ void ProgramManager::UpdatePipeline() { old_state = current_state; } -void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) { +void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) { const auto& regs = maxwell.regs; - const auto& state = maxwell.state; - - // TODO(bunnei): Support more than one viewport - viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; - viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f; - - instance_id = state.current_instance; - - // Assign in which stage the position has to be flipped - // (the last stage before the fragment shader). - constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); - if (maxwell.regs.shader_config[geometry_index].enable) { - flip_stage = geometry_index; - } else { - flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); - } // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. - y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f; + y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 6961e702a..3703e7018 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -18,17 +18,12 @@ namespace OpenGL::GLShader { /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at /// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. /// Not following that rule will cause problems on some AMD drivers. -struct MaxwellUniformData { - void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage); - - alignas(16) GLvec4 viewport_flip; - struct alignas(16) { - GLuint instance_id; - GLuint flip_stage; - GLfloat y_direction; - }; +struct alignas(16) MaxwellUniformData { + void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); + + GLfloat y_direction; }; -static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect"); +static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); static_assert(sizeof(MaxwellUniformData) < 16384, "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index f25148362..ccbe5912e 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -410,6 +410,12 @@ void OpenGLState::ApplyAlphaTest() { } } +void OpenGLState::ApplyClipControl() { + if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) { + glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE); + } +} + void OpenGLState::ApplyTextures() { if (const auto update = UpdateArray(cur_state.textures, textures)) { glBindTextures(update->first, update->second, textures.data() + update->first); @@ -453,6 +459,7 @@ void OpenGLState::Apply() { ApplyImages(); ApplyPolygonOffset(); ApplyAlphaTest(); + ApplyClipControl(); } void OpenGLState::EmulateViewportWithScissor() { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index cca25206b..eaff22bda 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -146,6 +146,10 @@ public: std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE + struct { + GLenum origin = GL_LOWER_LEFT; + } clip_control; + OpenGLState(); /// Get the currently active OpenGL state @@ -182,6 +186,7 @@ public: void ApplyDepthClamp(); void ApplyPolygonOffset(); void ApplyAlphaTest(); + void ApplyClipControl(); /// Resets any references to the given resource OpenGLState& UnbindTexture(GLuint handle); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 42cf068b6..2850d5b59 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -783,6 +783,11 @@ private: return {}; } + Id FSwizzleAdd(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id HNegate(Operation operation) { UNIMPLEMENTED(); return {}; @@ -1195,42 +1200,12 @@ private: return {}; } - Id ShuffleIndexed(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id ShuffleUp(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id ShuffleDown(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id ShuffleButterfly(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id InRangeShuffleIndexed(Operation) { + Id ThreadId(Operation) { UNIMPLEMENTED(); return {}; } - Id InRangeShuffleUp(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id InRangeShuffleDown(Operation) { - UNIMPLEMENTED(); - return {}; - } - - Id InRangeShuffleButterfly(Operation) { + Id ShuffleIndexed(Operation) { UNIMPLEMENTED(); return {}; } @@ -1393,6 +1368,7 @@ private: &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, + &SPIRVDecompiler::FSwizzleAdd, &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, @@ -1528,15 +1504,8 @@ private: &SPIRVDecompiler::VoteAny, &SPIRVDecompiler::VoteEqual, + &SPIRVDecompiler::ThreadId, &SPIRVDecompiler::ShuffleIndexed, - &SPIRVDecompiler::ShuffleUp, - &SPIRVDecompiler::ShuffleDown, - &SPIRVDecompiler::ShuffleButterfly, - - &SPIRVDecompiler::InRangeShuffleIndexed, - &SPIRVDecompiler::InRangeShuffleUp, - &SPIRVDecompiler::InRangeShuffleDown, - &SPIRVDecompiler::InRangeShuffleButterfly, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index fa8a250cc..d98d0e1dd 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation; using Tegra::Shader::VoteOperation; namespace { + OperationCode GetOperationCode(VoteOperation vote_op) { switch (vote_op) { case VoteOperation::All: @@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) { return OperationCode::VoteAll; } } + } // Anonymous namespace u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { @@ -46,50 +48,59 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::SHFL: { - Node width = [this, instr] { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); - - // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has - // been done reversing Nvidia's math. It won't work on all cases due to SHFL having - // different parameters that don't properly map to GLSL's interface, but it should work - // for cases emitted by Nvidia's compiler. - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation( - OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), - Immediate(8)); - } else { - return Operation(OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, Immediate(0x201F), - Operation(OperationCode::INegate, std::move(mask))), - Immediate(8)); - } - }(); + Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) + : GetRegister(instr.gpr39); + Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) + : GetRegister(instr.gpr20); + + Node thread_id = Operation(OperationCode::ThreadId); + Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); + Node seg_mask = BitfieldExtract(mask, 8, 16); - const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { + Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); + Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); + Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, + Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); + + Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { switch (instr.shfl.operation) { case ShuffleOperation::Idx: - return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; - case ShuffleOperation::Up: - return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; + return Operation(OperationCode::IBitwiseOr, + Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), + min_thread_id); case ShuffleOperation::Down: - return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; + return Operation(OperationCode::IAdd, thread_id, index); + case ShuffleOperation::Up: + return Operation(OperationCode::IAdd, thread_id, + Operation(OperationCode::INegate, index)); case ShuffleOperation::Bfly: - return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; + return Operation(OperationCode::IBitwiseXor, thread_id, index); } - UNREACHABLE_MSG("Invalid SHFL operation: {}", - static_cast<u64>(instr.shfl.operation.Value())); - return {}; + UNREACHABLE(); + return Immediate(0U); }(); - // Setting the predicate before the register is intentional to avoid overwriting. - Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); + Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { + if (instr.shfl.operation == ShuffleOperation::Up) { + return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); + } else { + return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); + } + }(); + + SetPredicate(bb, instr.shfl.pred48, in_bounds); SetRegister( bb, instr.gpr0, - Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); + Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); + break; + } + case OpCode::Id::FSWZADD: { + UNIMPLEMENTED_IF(instr.fswzadd.ndv); + + Node op_a = GetRegister(instr.gpr8); + Node op_b = GetRegister(instr.gpr20); + Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); + SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); break; } default: diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4300d9ff4..54217e6a4 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -47,6 +47,7 @@ enum class OperationCode { FTrunc, /// (MetaArithmetic, float a) -> float FCastInteger, /// (MetaArithmetic, int a) -> float FCastUInteger, /// (MetaArithmetic, uint a) -> float + FSwizzleAdd, /// (float a, float b, uint mask) -> float IAdd, /// (MetaArithmetic, int a, int b) -> int IMul, /// (MetaArithmetic, int a, int b) -> int @@ -181,15 +182,8 @@ enum class OperationCode { VoteAny, /// (bool) -> bool VoteEqual, /// (bool) -> bool - ShuffleIndexed, /// (uint value, uint index, uint width) -> uint - ShuffleUp, /// (uint value, uint index, uint width) -> uint - ShuffleDown, /// (uint value, uint index, uint width) -> uint - ShuffleButterfly, /// (uint value, uint index, uint width) -> uint - - InRangeShuffleIndexed, /// (uint index, uint width) -> bool - InRangeShuffleUp, /// (uint index, uint width) -> bool - InRangeShuffleDown, /// (uint index, uint width) -> bool - InRangeShuffleButterfly, /// (uint index, uint width) -> bool + ThreadId, /// () -> uint + ShuffleIndexed, /// (uint value, uint index) -> uint Amount, }; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 621136b6e..4b6846113 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -249,6 +249,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return PixelFormat::RGBA16U; case Tegra::Texture::ComponentType::FLOAT: return PixelFormat::RGBA16F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::RGBA16UI; default: break; } |
