aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/decode/image.cpp104
-rw-r--r--src/video_core/shader/decode/memory.cpp48
-rw-r--r--src/video_core/shader/decode/shift.cpp19
-rw-r--r--src/video_core/shader/decode/warp.cpp47
-rw-r--r--src/video_core/shader/node.h109
-rw-r--r--src/video_core/shader/shader_ir.cpp9
-rw-r--r--src/video_core/shader/shader_ir.h20
7 files changed, 279 insertions, 77 deletions
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 77151a24b..d54fb88c9 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -44,7 +44,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::SUST: {
UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
- UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer);
UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
@@ -61,56 +60,105 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
}
const auto type{instr.sust.image_type};
- const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
- : GetBindlessImage(instr.gpr39, type)};
+ auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
+ : GetBindlessImage(instr.gpr39, type)};
+ image.MarkWrite();
+
MetaImage meta{image, values};
- const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))};
- bb.push_back(store);
+ bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords)));
+ break;
+ }
+ case OpCode::Id::SUATOM: {
+ UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
+
+ Node value = GetRegister(instr.gpr0);
+
+ std::vector<Node> coords;
+ const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
+ for (std::size_t i = 0; i < num_coords; ++i) {
+ coords.push_back(GetRegister(instr.gpr8.Value() + i));
+ }
+
+ const OperationCode operation_code = [instr] {
+ switch (instr.suatom_d.operation) {
+ case Tegra::Shader::ImageAtomicOperation::Add:
+ return OperationCode::AtomicImageAdd;
+ case Tegra::Shader::ImageAtomicOperation::Min:
+ return OperationCode::AtomicImageMin;
+ case Tegra::Shader::ImageAtomicOperation::Max:
+ return OperationCode::AtomicImageMax;
+ case Tegra::Shader::ImageAtomicOperation::And:
+ return OperationCode::AtomicImageAnd;
+ case Tegra::Shader::ImageAtomicOperation::Or:
+ return OperationCode::AtomicImageOr;
+ case Tegra::Shader::ImageAtomicOperation::Xor:
+ return OperationCode::AtomicImageXor;
+ case Tegra::Shader::ImageAtomicOperation::Exch:
+ return OperationCode::AtomicImageExchange;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented operation={}",
+ static_cast<u32>(instr.suatom_d.operation.Value()));
+ return OperationCode::AtomicImageAdd;
+ }
+ }();
+
+ const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)};
+ MetaImage meta{image, {std::move(value)}};
+ SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords)));
break;
}
default:
- UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
+ UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
}
return pc;
}
-const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
+Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size) {
const auto offset{static_cast<std::size_t>(image.index.Value())};
-
- // If this image has already been used, return the existing mapping.
- const auto itr{std::find_if(used_images.begin(), used_images.end(),
- [=](const Image& entry) { return entry.GetOffset() == offset; })};
- if (itr != used_images.end()) {
- ASSERT(itr->GetType() == type);
- return *itr;
+ if (const auto image = TryUseExistingImage(offset, type, size)) {
+ return *image;
}
- // Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
- const Image entry{offset, next_index, type};
- return *used_images.emplace(entry).first;
+ return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second;
}
-const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
- Tegra::Shader::ImageType type) {
+Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size) {
const Node image_register{GetRegister(reg)};
const auto [base_image, cbuf_index, cbuf_offset]{
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
- // If this image has already been used, return the existing mapping.
- const auto itr{std::find_if(used_images.begin(), used_images.end(),
- [=](const Image& entry) { return entry.GetOffset() == cbuf_key; })};
- if (itr != used_images.end()) {
- ASSERT(itr->GetType() == type);
- return *itr;
+ if (const auto image = TryUseExistingImage(cbuf_key, type, size)) {
+ return *image;
}
- // Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
- const Image entry{cbuf_index, cbuf_offset, next_index, type};
- return *used_images.emplace(entry).first;
+ return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size})
+ .first->second;
+}
+
+Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size) {
+ auto it = used_images.find(offset);
+ if (it == used_images.end()) {
+ return nullptr;
+ }
+ auto& image = it->second;
+ ASSERT(image.GetType() == type);
+
+ if (size) {
+ // We know the size, if it's known it has to be the same as before, otherwise we can set it.
+ if (image.IsSizeKnown()) {
+ ASSERT(image.GetSize() == size);
+ } else {
+ image.SetSize(*size);
+ }
+ }
+ return &image;
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ed108bea8..7923d4d69 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -35,7 +35,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
return 1;
}
}
-} // namespace
+} // Anonymous namespace
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
@@ -106,16 +106,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
- case OpCode::Id::LD_L: {
- LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}",
- static_cast<u64>(instr.ld_l.unknown.Value()));
-
- const auto GetLmem = [&](s32 offset) {
+ case OpCode::Id::LD_L:
+ LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));
+ [[fallthrough]];
+ case OpCode::Id::LD_S: {
+ const auto GetMemory = [&](s32 offset) {
ASSERT(offset % 4 == 0);
const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
immediate_offset);
- return GetLocalMemory(address);
+ return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address)
+ : GetLocalMemory(address);
};
switch (instr.ldst_sl.type.Value()) {
@@ -135,14 +136,16 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
return 0;
}
}();
- for (u32 i = 0; i < count; ++i)
- SetTemporary(bb, i, GetLmem(i * 4));
- for (u32 i = 0; i < count; ++i)
+ for (u32 i = 0; i < count; ++i) {
+ SetTemporary(bb, i, GetMemory(i * 4));
+ }
+ for (u32 i = 0; i < count; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+ }
break;
}
default:
- UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
+ UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
static_cast<u32>(instr.ldst_sl.type.Value()));
}
break;
@@ -209,27 +212,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
- case OpCode::Id::ST_L: {
+ case OpCode::Id::ST_L:
LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}",
static_cast<u64>(instr.st_l.cache_management.Value()));
-
- const auto GetLmemAddr = [&](s32 offset) {
+ [[fallthrough]];
+ case OpCode::Id::ST_S: {
+ const auto GetAddress = [&](s32 offset) {
ASSERT(offset % 4 == 0);
const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
};
+ const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L
+ ? &ShaderIR::SetLocalMemory
+ : &ShaderIR::SetSharedMemory;
+
switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bits128:
- SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3));
- SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2));
+ (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
+ (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
+ [[fallthrough]];
case Tegra::Shader::StoreType::Bits64:
- SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1));
+ (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
+ [[fallthrough]];
case Tegra::Shader::StoreType::Bits32:
- SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0));
+ (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
break;
default:
- UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
+ UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
static_cast<u32>(instr.ldst_sl.type.Value()));
}
break;
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 2ac16eeb0..f6ee68a54 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -17,8 +17,8 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- const Node op_a = GetRegister(instr.gpr8);
- const Node op_b = [&]() {
+ Node op_a = GetRegister(instr.gpr8);
+ Node op_b = [&]() {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
@@ -32,16 +32,23 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
case OpCode::Id::SHR_C:
case OpCode::Id::SHR_R:
case OpCode::Id::SHR_IMM: {
- const Node value = SignedOperation(OperationCode::IArithmeticShiftRight,
- instr.shift.is_signed, PRECISE, op_a, op_b);
+ if (instr.shr.wrap) {
+ op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f));
+ } else {
+ op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0));
+ op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31));
+ }
+
+ Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
+ std::move(op_a), std::move(op_b));
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
+ SetRegister(bb, instr.gpr0, std::move(value));
break;
}
case OpCode::Id::SHL_C:
case OpCode::Id::SHL_R:
case OpCode::Id::SHL_IMM: {
- const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b);
+ const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index 04ca74f46..a8e481b3c 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -13,6 +13,7 @@ namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
+using Tegra::Shader::ShuffleOperation;
using Tegra::Shader::VoteOperation;
namespace {
@@ -44,6 +45,52 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
SetPredicate(bb, instr.vote.dest_pred, vote);
break;
}
+ case OpCode::Id::SHFL: {
+ Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
+ : GetRegister(instr.gpr39);
+ Node width = [&] {
+ // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
+ // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
+ // different parameters that don't properly map to GLSL's interface, but it should work
+ // for cases emitted by Nvidia's compiler.
+ if (instr.shfl.operation == ShuffleOperation::Up) {
+ return Operation(
+ OperationCode::ILogicalShiftRight,
+ Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
+ Immediate(8));
+ } else {
+ return Operation(OperationCode::ILogicalShiftRight,
+ Operation(OperationCode::IAdd, Immediate(0x201F),
+ Operation(OperationCode::INegate, std::move(mask))),
+ Immediate(8));
+ }
+ }();
+
+ const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
+ switch (instr.shfl.operation) {
+ case ShuffleOperation::Idx:
+ return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
+ case ShuffleOperation::Up:
+ return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
+ case ShuffleOperation::Down:
+ return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
+ case ShuffleOperation::Bfly:
+ return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
+ }
+ UNREACHABLE_MSG("Invalid SHFL operation: {}",
+ static_cast<u64>(instr.shfl.operation.Value()));
+ return {};
+ }();
+
+ // Setting the predicate before the register is intentional to avoid overwriting.
+ Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
+ : GetRegister(instr.gpr20);
+ SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
+ SetRegister(
+ bb, instr.gpr0,
+ Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
+ break;
+ }
default:
UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
break;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 5db9313c4..abf2cb1ab 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -7,6 +7,7 @@
#include <array>
#include <cstddef>
#include <memory>
+#include <optional>
#include <string>
#include <tuple>
#include <utility>
@@ -148,7 +149,14 @@ enum class OperationCode {
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
TexelFetch, /// (MetaTexture, int[N], int) -> float4
- ImageStore, /// (MetaImage, float[N] coords) -> void
+ ImageStore, /// (MetaImage, int[N] values) -> void
+ AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
+ AtomicImageMin, /// (MetaImage, int[N] coords) -> void
+ AtomicImageMax, /// (MetaImage, int[N] coords) -> void
+ AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
+ AtomicImageOr, /// (MetaImage, int[N] coords) -> void
+ AtomicImageXor, /// (MetaImage, int[N] coords) -> void
+ AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void
@@ -173,6 +181,16 @@ enum class OperationCode {
VoteAny, /// (bool) -> bool
VoteEqual, /// (bool) -> bool
+ ShuffleIndexed, /// (uint value, uint index, uint width) -> uint
+ ShuffleUp, /// (uint value, uint index, uint width) -> uint
+ ShuffleDown, /// (uint value, uint index, uint width) -> uint
+ ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
+
+ InRangeShuffleIndexed, /// (uint index, uint width) -> bool
+ InRangeShuffleUp, /// (uint index, uint width) -> bool
+ InRangeShuffleDown, /// (uint index, uint width) -> bool
+ InRangeShuffleButterfly, /// (uint index, uint width) -> bool
+
Amount,
};
@@ -198,12 +216,13 @@ class PredicateNode;
class AbufNode;
class CbufNode;
class LmemNode;
+class SmemNode;
class GmemNode;
class CommentNode;
using NodeData =
std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
- PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>;
+ PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>;
using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>;
@@ -273,46 +292,85 @@ private:
bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
};
-class Image {
+class Image final {
public:
- explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
- : offset{offset}, index{index}, type{type}, is_bindless{false} {}
+ constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size)
+ : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {}
- explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::ImageType type)
+ constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
+ Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size)
: offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_bindless{true} {}
+ is_bindless{true}, size{size} {}
- explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
- bool is_bindless)
- : offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {}
+ constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
+ bool is_bindless, bool is_written, bool is_read,
+ std::optional<Tegra::Shader::ImageAtomicSize> size)
+ : offset{offset}, index{index}, type{type}, is_bindless{is_bindless},
+ is_written{is_written}, is_read{is_read}, size{size} {}
- std::size_t GetOffset() const {
+ void MarkWrite() {
+ is_written = true;
+ }
+
+ void MarkRead() {
+ is_read = true;
+ }
+
+ void SetSize(Tegra::Shader::ImageAtomicSize size_) {
+ size = size_;
+ }
+
+ constexpr std::size_t GetOffset() const {
return offset;
}
- std::size_t GetIndex() const {
+ constexpr std::size_t GetIndex() const {
return index;
}
- Tegra::Shader::ImageType GetType() const {
+ constexpr Tegra::Shader::ImageType GetType() const {
return type;
}
- bool IsBindless() const {
+ constexpr bool IsBindless() const {
return is_bindless;
}
- bool operator<(const Image& rhs) const {
- return std::tie(offset, index, type, is_bindless) <
- std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
+ constexpr bool IsWritten() const {
+ return is_written;
+ }
+
+ constexpr bool IsRead() const {
+ return is_read;
+ }
+
+ constexpr std::pair<u32, u32> GetBindlessCBuf() const {
+ return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
+ }
+
+ constexpr bool IsSizeKnown() const {
+ return size.has_value();
+ }
+
+ constexpr Tegra::Shader::ImageAtomicSize GetSize() const {
+ return size.value();
+ }
+
+ constexpr bool operator<(const Image& rhs) const {
+ return std::tie(offset, index, type, size, is_bindless) <
+ std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless);
}
private:
- std::size_t offset{};
+ u64 offset{};
std::size_t index{};
Tegra::Shader::ImageType type{};
bool is_bindless{};
+ bool is_written{};
+ bool is_read{};
+ std::optional<Tegra::Shader::ImageAtomicSize> size{};
};
struct GlobalMemoryBase {
@@ -536,6 +594,19 @@ private:
Node address;
};
+/// Shared memory node
+class SmemNode final {
+public:
+ explicit SmemNode(Node address) : address{std::move(address)} {}
+
+ const Node& GetAddress() const {
+ return address;
+ }
+
+private:
+ Node address;
+};
+
/// Global memory node
class GmemNode final {
public:
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 1e5c7f660..bbbab0bca 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -137,6 +137,10 @@ Node ShaderIR::GetLocalMemory(Node address) {
return MakeNode<LmemNode>(std::move(address));
}
+Node ShaderIR::GetSharedMemory(Node address) {
+ return MakeNode<SmemNode>(std::move(address));
+}
+
Node ShaderIR::GetTemporary(u32 id) {
return GetRegister(Register::ZeroIndex + 1 + id);
}
@@ -378,6 +382,11 @@ void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
}
+void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) {
+ bb.push_back(
+ Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value)));
+}
+
void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index bcc9b79b6..6aed9bb84 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -95,7 +95,7 @@ public:
return used_samplers;
}
- const std::set<Image>& GetImages() const {
+ const std::map<u64, Image>& GetImages() const {
return used_images;
}
@@ -208,6 +208,8 @@ private:
Node GetInternalFlag(InternalFlag flag, bool negated = false);
/// Generates a node representing a local memory address
Node GetLocalMemory(Node address);
+ /// Generates a node representing a shared memory address
+ Node GetSharedMemory(Node address);
/// Generates a temporary, internally it uses a post-RZ register
Node GetTemporary(u32 id);
@@ -217,8 +219,10 @@ private:
void SetPredicate(NodeBlock& bb, u64 dest, Node src);
/// Sets an internal flag. src value must be a bool-evaluated node
void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
- /// Sets a local memory address. address and value must be a number-evaluated node
+ /// Sets a local memory address with a value.
void SetLocalMemory(NodeBlock& bb, Node address, Node value);
+ /// Sets a shared memory address with a value.
+ void SetSharedMemory(NodeBlock& bb, Node address, Node value);
/// Sets a temporary. Internally it uses a post-RZ register
void SetTemporary(NodeBlock& bb, u32 id, Node value);
@@ -272,10 +276,16 @@ private:
bool is_shadow);
/// Accesses an image.
- const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
+ Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size = {});
/// Access a bindless image sampler.
- const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
+ Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size = {});
+
+ /// Tries to access an existing image, updating it's state as needed
+ Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size);
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -356,7 +366,7 @@ private:
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
std::set<Sampler> used_samplers;
- std::set<Image> used_images;
+ std::map<u64, Image> used_images;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
bool uses_layer{};