8 files changed, 150 insertions, 72 deletions
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 2fe787d6f..0f4c3103a 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -235,34 +235,30 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
     case OpCode::Id::LEA_IMM:
     case OpCode::Id::LEA_RZ:
     case OpCode::Id::LEA_HI: {
-        const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
+        auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
             switch (opcode->get().GetId()) {
             case OpCode::Id::LEA_R2: {
                 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
                         Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
             }
-
             case OpCode::Id::LEA_R1: {
                 const bool neg = instr.lea.r1.neg != 0;
                 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                         GetRegister(instr.gpr20),
                         Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
             }
-
             case OpCode::Id::LEA_IMM: {
                 const bool neg = instr.lea.imm.neg != 0;
                 return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
                         GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                         Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
             }
-
             case OpCode::Id::LEA_RZ: {
                 const bool neg = instr.lea.rz.neg != 0;
                 return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
                         GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                         Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
             }
-
             case OpCode::Id::LEA_HI:
             default:
                 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
@@ -275,12 +271,9 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
                              "Unhandled LEA Predicate");
 
-        const Node shifted_c =
-            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c);
-        const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c);
-        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc);
-
-        SetRegister(bb, instr.gpr0, value);
+        Node value = Operation(OperationCode::ILogicalShiftLeft, std::move(op_a), std::move(op_c));
+        value = Operation(OperationCode::IAdd, std::move(op_b), std::move(value));
+        SetRegister(bb, instr.gpr0, std::move(value));
 
         break;
     }
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 6ead42070..c72690b2b 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -138,18 +138,23 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
 
         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
 
-        value = [&]() {
+        value = [&] {
+            if (instr.conversion.src_size != instr.conversion.dst_size) {
+                // Rounding operations only matter when the source and destination conversion size
+                // is the same.
+                return value;
+            }
             switch (instr.conversion.f2f.GetRoundingMode()) {
             case Tegra::Shader::F2fRoundingOp::None:
                 return value;
             case Tegra::Shader::F2fRoundingOp::Round:
-                return Operation(OperationCode::FRoundEven, PRECISE, value);
+                return Operation(OperationCode::FRoundEven, value);
             case Tegra::Shader::F2fRoundingOp::Floor:
-                return Operation(OperationCode::FFloor, PRECISE, value);
+                return Operation(OperationCode::FFloor, value);
             case Tegra::Shader::F2fRoundingOp::Ceil:
-                return Operation(OperationCode::FCeil, PRECISE, value);
+                return Operation(OperationCode::FCeil, value);
             case Tegra::Shader::F2fRoundingOp::Trunc:
-                return Operation(OperationCode::FTrunc, PRECISE, value);
+                return Operation(OperationCode::FTrunc, value);
             default:
                 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
                                   static_cast<u32>(instr.conversion.f2f.rounding.Value()));
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index b5fbc4d58..28a49addd 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -19,7 +19,6 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::AtomicOp;
 using Tegra::Shader::AtomicType;
 using Tegra::Shader::Attribute;
-using Tegra::Shader::GlobalAtomicOp;
 using Tegra::Shader::GlobalAtomicType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
@@ -28,6 +27,28 @@ using Tegra::Shader::StoreType;
 
 namespace {
 
+Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) {
+    const OperationCode operation_code = [op] {
+        switch (op) {
+        case AtomicOp::Add:
+            return OperationCode::AtomicIAdd;
+        case AtomicOp::Min:
+            return OperationCode::AtomicIMin;
+        case AtomicOp::Max:
+            return OperationCode::AtomicIMax;
+        case AtomicOp::And:
+            return OperationCode::AtomicIAnd;
+        case AtomicOp::Or:
+            return OperationCode::AtomicIOr;
+        case AtomicOp::Xor:
+            return OperationCode::AtomicIXor;
+        case AtomicOp::Exch:
+            return OperationCode::AtomicIExchange;
+        }
+    }();
+    return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data));
+}
+
 bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
     return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
            uniform_type == Tegra::Shader::UniformType::UnsignedShort;
@@ -363,10 +384,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::ATOM: {
-        UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
-                             static_cast<int>(instr.atom.operation.Value()));
-        UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
-                             static_cast<int>(instr.atom.type.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
+                                 instr.atom.operation == AtomicOp::Dec ||
+                                 instr.atom.operation == AtomicOp::SafeAdd,
+                             "operation={}", static_cast<int>(instr.atom.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
+                                 instr.atom.type == GlobalAtomicType::U64,
+                             "type={}", static_cast<int>(instr.atom.type.Value()));
 
         const auto [real_address, base_address, descriptor] =
             TrackGlobalMemory(bb, instr, true, true);
@@ -375,25 +399,29 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
             break;
         }
 
+        const bool is_signed =
+            instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
         Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-        Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
+        Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem,
+                                      GetRegister(instr.gpr20));
         SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
     case OpCode::Id::ATOMS: {
-        UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
-                             static_cast<int>(instr.atoms.operation.Value()));
-        UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
-                             static_cast<int>(instr.atoms.type.Value()));
-
+        UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
+                                 instr.atoms.operation == AtomicOp::Dec,
+                             "operation={}", static_cast<int>(instr.atoms.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
+                                 instr.atoms.type == AtomicType::U64,
+                             "type={}", static_cast<int>(instr.atoms.type.Value()));
+        const bool is_signed =
+            instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
         const s32 offset = instr.atoms.GetImmediateOffset();
         Node address = GetRegister(instr.gpr8);
         address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
-
-        Node memory = GetSharedMemory(std::move(address));
-        Node data = GetRegister(instr.gpr20);
-
-        Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
+        Node value =
+            GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed,
+                             GetSharedMemory(std::move(address)), GetRegister(instr.gpr20));
         SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index fbd7e9a17..6191ffba1 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -31,7 +31,7 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
     const bool is_signed_b = instr.xmad.sign_b == 1;
     const bool is_signed_c = is_signed_a;
 
-    auto [is_merge, is_psl, is_high_b, mode, op_b,
+    auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
           op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
         switch (opcode->get().GetId()) {
         case OpCode::Id::XMAD_CR:
@@ -67,9 +67,10 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
     op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
                            instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
 
-    const Node original_b = op_b;
-    op_b = SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b),
-                           is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
+    const Node original_b = op_b_binding;
+    const Node op_b =
+        SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
+                        is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
 
     // we already check sign_a and sign_b is difference or not before so just use one in here.
     Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index a1828546e..5fcc9da60 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,7 +162,21 @@ enum class OperationCode {
     AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
     AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
 
-    AtomicAdd, /// (memory, {u}int) -> {u}int
+    AtomicUExchange, /// (memory, uint) -> uint
+    AtomicUAdd,      /// (memory, uint) -> uint
+    AtomicUMin,      /// (memory, uint) -> uint
+    AtomicUMax,      /// (memory, uint) -> uint
+    AtomicUAnd,      /// (memory, uint) -> uint
+    AtomicUOr,       /// (memory, uint) -> uint
+    AtomicUXor,      /// (memory, uint) -> uint
+
+    AtomicIExchange, /// (memory, int) -> int
+    AtomicIAdd,      /// (memory, int) -> int
+    AtomicIMin,      /// (memory, int) -> int
+    AtomicIMax,      /// (memory, int) -> int
+    AtomicIAnd,      /// (memory, int) -> int
+    AtomicIOr,       /// (memory, int) -> int
+    AtomicIXor,      /// (memory, int) -> int
 
     Branch,         /// (uint branch_target) -> void
     BranchIndirect, /// (uint branch_target) -> void
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 76c56abb5..7bf4ff387 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -86,6 +86,20 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
         return OperationCode::LogicalUNotEqual;
     case OperationCode::LogicalIGreaterEqual:
         return OperationCode::LogicalUGreaterEqual;
+    case OperationCode::AtomicIExchange:
+        return OperationCode::AtomicUExchange;
+    case OperationCode::AtomicIAdd:
+        return OperationCode::AtomicUAdd;
+    case OperationCode::AtomicIMin:
+        return OperationCode::AtomicUMin;
+    case OperationCode::AtomicIMax:
+        return OperationCode::AtomicUMax;
+    case OperationCode::AtomicIAnd:
+        return OperationCode::AtomicUAnd;
+    case OperationCode::AtomicIOr:
+        return OperationCode::AtomicUOr;
+    case OperationCode::AtomicIXor:
+        return OperationCode::AtomicUXor;
     case OperationCode::INegate:
         UNREACHABLE_MSG("Can't negate an unsigned integer");
         return {};
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 425927777..baf7188d2 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -96,6 +96,7 @@ Node ShaderIR::GetPredicate(bool immediate) {
 }
 
 Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
+    MarkAttributeUsage(index, element);
     used_input_attributes.emplace(index);
     return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
 }
@@ -106,42 +107,8 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres
 }
 
 Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
-    if (index == Attribute::Index::LayerViewportPointSize) {
-        switch (element) {
-        case 0:
-            UNIMPLEMENTED();
-            break;
-        case 1:
-            uses_layer = true;
-            break;
-        case 2:
-            uses_viewport_index = true;
-            break;
-        case 3:
-            uses_point_size = true;
-            break;
-        }
-    }
-    if (index == Attribute::Index::TessCoordInstanceIDVertexID) {
-        switch (element) {
-        case 2:
-            uses_instance_id = true;
-            break;
-        case 3:
-            uses_vertex_id = true;
-            break;
-        default:
-            break;
-        }
-    }
-    if (index == Attribute::Index::ClipDistances0123 ||
-        index == Attribute::Index::ClipDistances4567) {
-        const auto clip_index =
-            static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element);
-        used_clip_distances.at(clip_index) = true;
-    }
+    MarkAttributeUsage(index, element);
     used_output_attributes.insert(index);
-
     return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
 }
 
@@ -452,6 +419,54 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
                      Immediate(bits));
 }
 
+void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
+    switch (index) {
+    case Attribute::Index::LayerViewportPointSize:
+        switch (element) {
+        case 0:
+            UNIMPLEMENTED();
+            break;
+        case 1:
+            uses_layer = true;
+            break;
+        case 2:
+            uses_viewport_index = true;
+            break;
+        case 3:
+            uses_point_size = true;
+            break;
+        }
+        break;
+    case Attribute::Index::TessCoordInstanceIDVertexID:
+        switch (element) {
+        case 2:
+            uses_instance_id = true;
+            break;
+        case 3:
+            uses_vertex_id = true;
+            break;
+        }
+        break;
+    case Attribute::Index::ClipDistances0123:
+    case Attribute::Index::ClipDistances4567: {
+        const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
+        used_clip_distances.at(clip_index) = true;
+        break;
+    }
+    case Attribute::Index::FrontColor:
+    case Attribute::Index::FrontSecondaryColor:
+    case Attribute::Index::BackColor:
+    case Attribute::Index::BackSecondaryColor:
+        uses_legacy_varyings = true;
+        break;
+    default:
+        if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
+            uses_legacy_varyings = true;
+        }
+        break;
+    }
+}
+
 std::size_t ShaderIR::DeclareAmend(Node new_amend) {
     const std::size_t id = amend_code.size();
     amend_code.push_back(new_amend);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index dde036b40..80fc9b82c 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -137,6 +137,10 @@ public:
         return uses_vertex_id;
     }
 
+    bool UsesLegacyVaryings() const {
+        return uses_legacy_varyings;
+    }
+
     bool UsesWarps() const {
         return uses_warps;
     }
@@ -343,6 +347,9 @@ private:
     /// Inserts a sequence of bits from a node
     Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
 
+    /// Marks the usage of a input or output attribute.
+    void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
+
     void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                   const Node4& components);
 
@@ -443,6 +450,7 @@ private:
     bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
     bool uses_instance_id{};
     bool uses_vertex_id{};
+    bool uses_legacy_varyings{};
     bool uses_warps{};
     bool uses_indexed_samplers{};