aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp19
-rw-r--r--src/video_core/shader/decode/memory.cpp34
-rw-r--r--src/video_core/shader/decode/other.cpp8
-rw-r--r--src/video_core/shader/decode/texture.cpp166
-rw-r--r--src/video_core/shader/decode/warp.cpp3
-rw-r--r--src/video_core/shader/node.h27
-rw-r--r--src/video_core/shader/shader_ir.h11
-rw-r--r--src/video_core/shader/track.cpp1
8 files changed, 215 insertions, 54 deletions
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index a33d242e9..371fae127 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -130,6 +130,25 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, value);
break;
}
+ case OpCode::Id::FLO_R:
+ case OpCode::Id::FLO_C:
+ case OpCode::Id::FLO_IMM: {
+ Node value;
+ if (instr.flo.invert) {
+ op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
+ }
+ if (instr.flo.is_signed) {
+ value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
+ } else {
+ value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
+ }
+ if (instr.flo.sh) {
+ value =
+ Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
+ }
+ SetRegister(bb, instr.gpr0, std::move(value));
+ break;
+ }
case OpCode::Id::SEL_C:
case OpCode::Id::SEL_R:
case OpCode::Id::SEL_IMM: {
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 335d78146..78e92f52e 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -21,6 +21,7 @@ using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
namespace {
+
u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
switch (uniform_type) {
case Tegra::Shader::UniformType::Single:
@@ -35,6 +36,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
return 1;
}
}
+
} // Anonymous namespace
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -196,28 +198,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
- u64 next_element = instr.attribute.fmt20.element;
- auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+ u64 element = instr.attribute.fmt20.element;
+ auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
- const auto StoreNextElement = [&](u32 reg_offset) {
- const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
- next_element, GetRegister(instr.gpr39));
+ const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+ for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+ Node dest;
+ if (instr.attribute.fmt20.patch) {
+ const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
+ dest = MakeNode<PatchNode>(offset);
+ } else {
+ dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
+ GetRegister(instr.gpr39));
+ }
const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
bb.push_back(Operation(OperationCode::Assign, dest, src));
- // Load the next attribute element into the following register. If the element
- // to load goes beyond the vec4 size, load the first element of the next
- // attribute.
- next_element = (next_element + 1) % 4;
- next_index = next_index + (next_element == 0 ? 1 : 0);
- };
-
- const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
- for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
- StoreNextElement(reg_offset);
+ // Load the next attribute element into the following register. If the element to load
+ // goes beyond the vec4 size, load the first element of the next attribute.
+ element = (element + 1) % 4;
+ index = index + (element == 0 ? 1 : 0);
}
-
break;
}
case OpCode::Id::ST_L:
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 17cd45d3c..7321698b2 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -69,6 +69,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
case OpCode::Id::MOV_SYS: {
const Node value = [this, instr] {
switch (instr.sys20) {
+ case SystemVariable::InvocationId:
+ return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:
@@ -255,6 +257,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
break;
}
+ case OpCode::Id::MEMBAR: {
+ UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
+ UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
+ bb.push_back(Operation(OperationCode::MemoryBarrierGL));
+ break;
+ }
case OpCode::Id::DEPBAR: {
LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
break;
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index b094e5a06..994c05611 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -107,8 +107,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::TLD4S: {
- UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
+ const bool uses_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
+ UNIMPLEMENTED_IF_MSG(uses_aoffi, "AOFFI is not implemented");
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
@@ -116,29 +116,86 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
std::vector<Node> coords;
+ Node dc_reg;
if (depth_compare) {
// Note: TLD4S coordinate encoding works just like TEXS's
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
coords.push_back(op_a);
coords.push_back(op_y);
- coords.push_back(op_b);
+ dc_reg = uses_aoffi ? GetRegister(instr.gpr20.Value() + 1) : op_b;
} else {
coords.push_back(op_a);
- coords.push_back(op_b);
+ if (uses_aoffi) {
+ const Node op_y = GetRegister(instr.gpr8.Value() + 1);
+ coords.push_back(op_y);
+ } else {
+ coords.push_back(op_b);
+ }
+ dc_reg = {};
}
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
const SamplerInfo info{TextureType::Texture2D, false, depth_compare};
- const auto& sampler = GetSampler(instr.sampler, info);
+ const Sampler& sampler = *GetSampler(instr.sampler, info);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
+ MetaTexture meta{sampler, {}, dc_reg, {}, {}, {}, {}, component, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
- WriteTexsInstructionFloat(bb, instr, values, true);
+ if (instr.tld4s.fp16_flag) {
+ WriteTexsInstructionHalfFloat(bb, instr, values, true);
+ } else {
+ WriteTexsInstructionFloat(bb, instr, values, true);
+ }
+ break;
+ }
+ case OpCode::Id::TXD_B:
+ is_bindless = true;
+ [[fallthrough]];
+ case OpCode::Id::TXD: {
+ UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented");
+
+ u64 base_reg = instr.gpr8.Value();
+ const auto derivate_reg = instr.gpr20.Value();
+ const auto texture_type = instr.txd.texture_type.Value();
+ const auto coord_count = GetCoordCount(texture_type);
+
+ const Sampler* sampler = is_bindless
+ ? GetBindlessSampler(base_reg, {{texture_type, false, false}})
+ : GetSampler(instr.sampler, {{texture_type, false, false}});
+ Node4 values;
+ if (sampler == nullptr) {
+ for (u32 element = 0; element < values.size(); ++element) {
+ values[element] = Immediate(0);
+ }
+ WriteTexInstructionFloat(bb, instr, values);
+ break;
+ }
+ if (is_bindless) {
+ base_reg++;
+ }
+
+ std::vector<Node> coords;
+ std::vector<Node> derivates;
+ for (std::size_t i = 0; i < coord_count; ++i) {
+ coords.push_back(GetRegister(base_reg + i));
+ const std::size_t derivate = i * 2;
+ derivates.push_back(GetRegister(derivate_reg + derivate));
+ derivates.push_back(GetRegister(derivate_reg + derivate + 1));
+ }
+
+ for (u32 element = 0; element < values.size(); ++element) {
+ MetaTexture meta{*sampler, {}, {}, {}, derivates, {}, {}, {}, element};
+ values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
+ }
+
+ WriteTexInstructionFloat(bb, instr, values);
+
break;
}
case OpCode::Id::TXQ_B:
@@ -148,9 +205,24 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
- const auto& sampler =
+ const Sampler* sampler =
is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler);
+ if (sampler == nullptr) {
+ u32 indexer = 0;
+ for (u32 element = 0; element < 4; ++element) {
+ if (!instr.txq.IsComponentEnabled(element)) {
+ continue;
+ }
+ const Node value = Immediate(0);
+ SetTemporary(bb, indexer++, value);
+ }
+ for (u32 i = 0; i < indexer; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+ }
+ break;
+ }
+
u32 indexer = 0;
switch (instr.txq.query_type) {
case Tegra::Shader::TextureQueryType::Dimension: {
@@ -158,7 +230,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
if (!instr.txq.IsComponentEnabled(element)) {
continue;
}
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta,
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -184,9 +256,24 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
- const auto& sampler =
+ const Sampler* sampler =
is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler);
+ if (sampler == nullptr) {
+ u32 indexer = 0;
+ for (u32 element = 0; element < 2; ++element) {
+ if (!instr.tmml.IsComponentEnabled(element)) {
+ continue;
+ }
+ const Node value = Immediate(0);
+ SetTemporary(bb, indexer++, value);
+ }
+ for (u32 i = 0; i < indexer; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+ }
+ break;
+ }
+
std::vector<Node> coords;
// TODO: Add coordinates for different samplers once other texture types are implemented.
@@ -212,7 +299,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
continue;
}
auto params = coords;
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporary(bb, indexer++, value);
}
@@ -268,7 +355,7 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
sampler->is_buffer != 0};
}
-const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
+const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
std::optional<SamplerInfo> sampler_info) {
const auto offset = static_cast<u32>(sampler.index.Value());
const auto info = GetSamplerInfo(sampler_info, offset);
@@ -280,21 +367,24 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
if (it != used_samplers.end()) {
ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer);
- return *it;
+ return &(*it);
}
// Otherwise create a new mapping for this sampler
const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
- info.is_buffer);
+ return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
+ info.is_buffer);
}
-const Sampler& ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
+const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
std::optional<SamplerInfo> sampler_info) {
const Node sampler_register = GetRegister(reg);
const auto [base_sampler, buffer, offset] =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
ASSERT(base_sampler != nullptr);
+ if (base_sampler == nullptr) {
+ return nullptr;
+ }
const auto info = GetSamplerInfo(sampler_info, offset, buffer);
@@ -307,13 +397,13 @@ const Sampler& ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
if (it != used_samplers.end()) {
ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
it->IsShadow() == info.is_shadow);
- return *it;
+ return &(*it);
}
// Otherwise create a new mapping for this sampler
const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
- info.is_shadow, info.is_buffer);
+ return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
+ info.is_shadow, info.is_buffer);
}
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -356,14 +446,14 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const
}
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
- const Node4& components) {
+ const Node4& components, bool ignore_mask) {
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
// float instruction).
Node4 values;
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
- if (!instr.texs.IsComponentEnabled(component))
+ if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
continue;
values[dest_elem++] = components[component];
}
@@ -399,8 +489,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
"This method is not supported.");
const SamplerInfo info{texture_type, is_array, is_shadow, false};
- const auto& sampler =
+ const Sampler* sampler =
is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info);
+ Node4 values;
+ if (sampler == nullptr) {
+ for (u32 element = 0; element < values.size(); ++element) {
+ values[element] = Immediate(0);
+ }
+ return values;
+ }
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
@@ -439,10 +536,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
}
}
- Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto copy_coords = coords;
- MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
+ MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
values[element] = Operation(read_method, meta, std::move(copy_coords));
}
@@ -555,8 +651,15 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
u64 parameter_register = instr.gpr20.Value();
const SamplerInfo info{texture_type, is_array, depth_compare, false};
- const auto& sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
- : GetSampler(instr.sampler, info);
+ const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
+ : GetSampler(instr.sampler, info);
+ Node4 values;
+ if (sampler == nullptr) {
+ for (u32 element = 0; element < values.size(); ++element) {
+ values[element] = Immediate(0);
+ }
+ return values;
+ }
std::vector<Node> aoffi;
if (is_aoffi) {
@@ -571,10 +674,9 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
: Immediate(static_cast<u32>(instr.tld4.component));
- Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component,
+ MetaTexture meta{*sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component,
element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -603,12 +705,12 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
- const auto& sampler = GetSampler(instr.sampler);
+ const auto& sampler = *GetSampler(instr.sampler);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
+ MetaTexture meta{sampler, array_register, {}, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
@@ -616,7 +718,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
}
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
- const auto& sampler = GetSampler(instr.sampler);
+ const Sampler& sampler = *GetSampler(instr.sampler);
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
@@ -653,7 +755,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
+ MetaTexture meta{sampler, array, {}, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index d98d0e1dd..11b77f795 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -38,6 +38,9 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
+ // Signal the backend that this shader uses warp instructions.
+ uses_warps = true;
+
switch (opcode->get().GetId()) {
case OpCode::Id::VOTE: {
const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 44d85d434..abd40f582 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -68,6 +68,7 @@ enum class OperationCode {
IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
IBitCount, /// (MetaArithmetic, int) -> int
+ IBitMSB, /// (MetaArithmetic, int) -> int
UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
UMul, /// (MetaArithmetic, uint a, uint b) -> uint
@@ -86,6 +87,7 @@ enum class OperationCode {
UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
UBitCount, /// (MetaArithmetic, uint) -> uint
+ UBitMSB, /// (MetaArithmetic, uint) -> uint
HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
@@ -149,6 +151,7 @@ enum class OperationCode {
TextureQueryDimensions, /// (MetaTexture, float a) -> float4
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
TexelFetch, /// (MetaTexture, int[N], int) -> float4
+ TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
ImageLoad, /// (MetaImage, int[N] coords) -> void
ImageStore, /// (MetaImage, int[N] coords) -> void
@@ -169,6 +172,7 @@ enum class OperationCode {
EmitVertex, /// () -> void
EndPrimitive, /// () -> void
+ InvocationId, /// () -> int
YNegate, /// () -> float
LocalInvocationIdX, /// () -> uint
LocalInvocationIdY, /// () -> uint
@@ -185,6 +189,8 @@ enum class OperationCode {
ThreadId, /// () -> uint
ShuffleIndexed, /// (uint value, uint index) -> uint
+ MemoryBarrierGL, /// () -> void
+
Amount,
};
@@ -210,13 +216,14 @@ class PredicateNode;
class AbufNode;
class CbufNode;
class LmemNode;
+class PatchNode;
class SmemNode;
class GmemNode;
class CommentNode;
-using NodeData =
- std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
- PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>;
+using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
+ InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
+ LmemNode, SmemNode, GmemNode, CommentNode>;
using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>;
@@ -367,6 +374,7 @@ struct MetaTexture {
Node array;
Node depth_compare;
std::vector<Node> aoffi;
+ std::vector<Node> derivates;
Node bias;
Node lod;
Node component{};
@@ -538,6 +546,19 @@ private:
u32 element{};
};
+/// Patch memory (used to communicate tessellation stages).
+class PatchNode final {
+public:
+ explicit PatchNode(u32 offset) : offset{offset} {}
+
+ u32 GetOffset() const {
+ return offset;
+ }
+
+private:
+ u32 offset{};
+};
+
/// Constant buffer node, usually mapped to uniform buffers in GLSL
class CbufNode final {
public:
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 2f71a50d2..04ae5f822 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -137,6 +137,10 @@ public:
return uses_vertex_id;
}
+ bool UsesWarps() const {
+ return uses_warps;
+ }
+
bool HasPhysicalAttributes() const {
return uses_physical_attributes;
}
@@ -309,11 +313,11 @@ private:
std::optional<u32> buffer = std::nullopt);
/// Accesses a texture sampler
- const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
+ const Sampler* GetSampler(const Tegra::Shader::Sampler& sampler,
std::optional<SamplerInfo> sampler_info = std::nullopt);
/// Accesses a texture sampler for a bindless texture.
- const Sampler& GetBindlessSampler(Tegra::Shader::Register reg,
+ const Sampler* GetBindlessSampler(Tegra::Shader::Register reg,
std::optional<SamplerInfo> sampler_info = std::nullopt);
/// Accesses an image.
@@ -334,7 +338,7 @@ private:
void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components, bool ignore_mask = false);
void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components);
+ const Node4& components, bool ignore_mask = false);
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
@@ -415,6 +419,7 @@ private:
bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
bool uses_instance_id{};
bool uses_vertex_id{};
+ bool uses_warps{};
Tegra::Shader::Header header;
};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 55f5949e4..165c79330 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -7,6 +7,7 @@
#include <variant>
#include "common/common_types.h"
+#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {