aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp126
-rw-r--r--src/video_core/shader/const_buffer_locker.h103
-rw-r--r--src/video_core/shader/control_flow.cpp13
-rw-r--r--src/video_core/shader/control_flow.h3
-rw-r--r--src/video_core/shader/decode.cpp22
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp33
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp94
-rw-r--r--src/video_core/shader/decode/bfe.cpp69
-rw-r--r--src/video_core/shader/decode/texture.cpp5
-rw-r--r--src/video_core/shader/decode/xmad.cpp68
-rw-r--r--src/video_core/shader/node.h2
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/registry.cpp161
-rw-r--r--src/video_core/shader/registry.h137
-rw-r--r--src/video_core/shader/shader_ir.cpp90
-rw-r--r--src/video_core/shader/shader_ir.h14
-rw-r--r--src/video_core/shader/track.cpp38
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
19 files changed, 713 insertions, 405 deletions
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
deleted file mode 100644
index 0638be8cb..000000000
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <tuple>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/const_buffer_locker.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Engines::SamplerDescriptor;
-
-ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
- : stage{shader_stage} {}
-
-ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine)
- : stage{shader_stage}, engine{&engine} {}
-
-ConstBufferLocker::~ConstBufferLocker() = default;
-
-std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
- const std::pair<u32, u32> key = {buffer, offset};
- const auto iter = keys.find(key);
- if (iter != keys.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
- keys.emplace(key, value);
- return value;
-}
-
-std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
- const u32 key = offset;
- const auto iter = bound_samplers.find(key);
- if (iter != bound_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
- bound_samplers.emplace(key, value);
- return value;
-}
-
-std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
- u32 buffer, u32 offset) {
- const std::pair key = {buffer, offset};
- const auto iter = bindless_samplers.find(key);
- if (iter != bindless_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
- bindless_samplers.emplace(key, value);
- return value;
-}
-
-std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
- if (bound_buffer_saved) {
- return bound_buffer;
- }
- if (!engine) {
- return std::nullopt;
- }
- bound_buffer_saved = true;
- bound_buffer = engine->GetBoundBuffer();
- return bound_buffer;
-}
-
-void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
- keys.insert_or_assign({buffer, offset}, value);
-}
-
-void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
- bound_samplers.insert_or_assign(offset, sampler);
-}
-
-void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
- bindless_samplers.insert_or_assign({buffer, offset}, sampler);
-}
-
-void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
- bound_buffer_saved = true;
- bound_buffer = buffer;
-}
-
-bool ConstBufferLocker::IsConsistent() const {
- if (!engine) {
- return false;
- }
- return std::all_of(keys.begin(), keys.end(),
- [this](const auto& pair) {
- const auto [cbuf, offset] = pair.first;
- const auto value = pair.second;
- return value == engine->AccessConstBuffer32(stage, cbuf, offset);
- }) &&
- std::all_of(bound_samplers.begin(), bound_samplers.end(),
- [this](const auto& sampler) {
- const auto [key, value] = sampler;
- return value == engine->AccessBoundSampler(stage, key);
- }) &&
- std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
- [this](const auto& sampler) {
- const auto [cbuf, offset] = sampler.first;
- const auto value = sampler.second;
- return value == engine->AccessBindlessSampler(stage, cbuf, offset);
- });
-}
-
-bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
- return std::tie(keys, bound_samplers, bindless_samplers) ==
- std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
deleted file mode 100644
index d3ea11087..000000000
--- a/src/video_core/shader/const_buffer_locker.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <optional>
-#include <unordered_map>
-#include "common/common_types.h"
-#include "common/hash.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/guest_driver.h"
-
-namespace VideoCommon::Shader {
-
-using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
-using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
-using BindlessSamplerMap =
- std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
-
-/**
- * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
- * compiler. with it, the shader can obtain required data from GPU state and store it for disk
- * shader compilation.
- */
-class ConstBufferLocker {
-public:
- explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
-
- explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine);
-
- ~ConstBufferLocker();
-
- /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
- /// not it will obtain it from maxwell3d and register it.
- std::optional<u32> ObtainKey(u32 buffer, u32 offset);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
-
- std::optional<u32> ObtainBoundBuffer();
-
- /// Inserts a key.
- void InsertKey(u32 buffer, u32 offset, u32 value);
-
- /// Inserts a bound sampler key.
- void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
- /// Inserts a bindless sampler key.
- void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
- /// Set the bound buffer for this locker.
- void SetBoundBuffer(u32 buffer);
-
- /// Checks keys and samplers against engine's current const buffers. Returns true if they are
- /// the same value, false otherwise;
- bool IsConsistent() const;
-
- /// Returns true if the keys are equal to the other ones in the locker.
- bool HasEqualKeys(const ConstBufferLocker& rhs) const;
-
- /// Gives an getter to the const buffer keys in the database.
- const KeyMap& GetKeys() const {
- return keys;
- }
-
- /// Gets samplers database.
- const BoundSamplerMap& GetBoundSamplers() const {
- return bound_samplers;
- }
-
- /// Gets bindless samplers database.
- const BindlessSamplerMap& GetBindlessSamplers() const {
- return bindless_samplers;
- }
-
- /// Gets bound buffer used on this shader
- u32 GetBoundBuffer() const {
- return bound_buffer;
- }
-
- /// Obtains access to the guest driver's profile.
- VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
- if (engine) {
- return &engine->AccessGuestDriverProfile();
- }
- return nullptr;
- }
-
-private:
- const Tegra::Engines::ShaderType stage;
- Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
- KeyMap keys;
- BoundSamplerMap bound_samplers;
- BindlessSamplerMap bindless_samplers;
- bool bound_buffer_saved{};
- u32 bound_buffer{};
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 0229733b6..2e2711350 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -13,6 +13,7 @@
#include "common/common_types.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/control_flow.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -64,11 +65,11 @@ struct BlockInfo {
};
struct CFGRebuildState {
- explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
- : program_code{program_code}, locker{locker}, start{start} {}
+ explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry)
+ : program_code{program_code}, registry{registry}, start{start} {}
const ProgramCode& program_code;
- ConstBufferLocker& locker;
+ Registry& registry;
u32 start{};
std::vector<BlockInfo> block_info;
std::list<u32> inspect_queries;
@@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
const s32 pc_target = offset + result.relative_position;
std::vector<CaseBranch> branches;
for (u32 i = 0; i < result.entries; i++) {
- auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
+ auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
if (!key) {
return {ParseResult::AbnormalFlow, parse_info};
}
@@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
- ConstBufferLocker& locker) {
+ Registry& registry) {
auto result_out = std::make_unique<ShaderCharacteristics>();
if (settings.depth == CompileDepth::BruteForce) {
result_out->settings.depth = CompileDepth::BruteForce;
return result_out;
}
- CFGRebuildState state{program_code, start_address, locker};
+ CFGRebuildState state{program_code, start_address, registry};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 5304998b9..62a3510d8 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -12,6 +12,7 @@
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -111,6 +112,6 @@ struct ShaderCharacteristics {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
- ConstBufferLocker& locker);
+ Registry& registry);
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 6b697ed5d..87ac9ac6c 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
return (absolute_offset % SchedPeriod) == 0;
}
-void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
+void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
const std::list<Sampler>& used_samplers) {
- if (gpu_driver == nullptr) {
- LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
- return;
- }
- if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
+ if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
return;
}
u32 count{};
@@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
bound_offsets.emplace_back(sampler.GetOffset());
}
if (count > 1) {
- gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
+ gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
}
}
std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
- VideoCore::GuestDriverProfile* gpu_driver,
+ VideoCore::GuestDriverProfile& gpu_driver,
const std::list<Sampler>& used_samplers) {
- if (gpu_driver == nullptr) {
- LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
- return std::nullopt;
- }
const u32 base_offset = sampler_to_deduce.GetOffset();
u32 max_offset{std::numeric_limits<u32>::max()};
for (const auto& sampler : used_samplers) {
@@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
if (max_offset == std::numeric_limits<u32>::max()) {
return std::nullopt;
}
- return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
+ return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
}
} // Anonymous namespace
@@ -149,7 +141,7 @@ void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
decompiled = false;
- auto info = ScanFlow(program_code, main_offset, settings, locker);
+ auto info = ScanFlow(program_code, main_offset, settings, registry);
auto& shader_info = *info;
coverage_begin = shader_info.start;
coverage_end = shader_info.end;
@@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
void ShaderIR::PostDecode() {
// Deduce texture handler size if needed
- auto gpu_driver = locker.AccessGuestDriverProfile();
+ auto gpu_driver = registry.AccessGuestDriverProfile();
DeduceTextureHandlerSize(gpu_driver, used_samplers);
// Deduce Indexed Samplers
if (!uses_indexed_samplers) {
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 90240c765..478394682 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -53,29 +53,24 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
- // TODO(Rodrigo): Should precise be used when there's a postfactor?
- Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
+ static constexpr std::array FmulPostFactor = {
+ 1.000f, // None
+ 0.500f, // Divide 2
+ 0.250f, // Divide 4
+ 0.125f, // Divide 8
+ 8.000f, // Mul 8
+ 4.000f, // Mul 4
+ 2.000f, // Mul 2
+ };
if (instr.fmul.postfactor != 0) {
- auto postfactor = static_cast<s32>(instr.fmul.postfactor);
-
- // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
- // logic.
- if (postfactor >= 4) {
- postfactor = 7 - postfactor;
- } else {
- postfactor = 0 - postfactor;
- }
-
- if (postfactor > 0) {
- value = Operation(OperationCode::FMul, NO_PRECISE, value,
- Immediate(static_cast<f32>(1 << postfactor)));
- } else {
- value = Operation(OperationCode::FDiv, NO_PRECISE, value,
- Immediate(static_cast<f32>(1 << -postfactor)));
- }
+ op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
+ Immediate(FmulPostFactor[instr.fmul.postfactor]));
}
+ // TODO(Rodrigo): Should precise be used when there's a postfactor?
+ Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
+
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 21366869d..2fe787d6f 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -293,44 +293,66 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
Node imm_lut, bool sets_cc) {
- constexpr u32 lop_iterations = 32;
- const Node one = Immediate(1);
- const Node two = Immediate(2);
-
- Node value;
- for (u32 i = 0; i < lop_iterations; ++i) {
- const Node shift_amount = Immediate(i);
-
- const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount);
- const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one);
-
- const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount);
- const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one);
- const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one);
-
- const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount);
- const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one);
- const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two);
-
- const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1);
- const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2);
-
- const Node shifted_bit =
- Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012);
- const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one);
-
- const Node right =
- Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount);
-
- if (i > 0) {
- value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right);
- } else {
- value = right;
+ const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
+ Node value = Immediate(0);
+ const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
+ if (imm.GetValue() & 0x01) {
+ const Node a = Operation(OperationCode::IBitwiseNot, na);
+ const Node b = Operation(OperationCode::IBitwiseNot, nb);
+ const Node c = Operation(OperationCode::IBitwiseNot, nc);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
}
- }
+ if (imm.GetValue() & 0x02) {
+ const Node a = Operation(OperationCode::IBitwiseNot, na);
+ const Node b = Operation(OperationCode::IBitwiseNot, nb);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x04) {
+ const Node a = Operation(OperationCode::IBitwiseNot, na);
+ const Node c = Operation(OperationCode::IBitwiseNot, nc);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x08) {
+ const Node a = Operation(OperationCode::IBitwiseNot, na);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x10) {
+ const Node b = Operation(OperationCode::IBitwiseNot, nb);
+ const Node c = Operation(OperationCode::IBitwiseNot, nc);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x20) {
+ const Node b = Operation(OperationCode::IBitwiseNot, nb);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x40) {
+ const Node c = Operation(OperationCode::IBitwiseNot, nc);
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ if (imm.GetValue() & 0x80) {
+ Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
+ r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+ value = Operation(OperationCode::IBitwiseOr, value, r);
+ }
+ return value;
+ }(op_a, op_b, op_c, imm_lut);
- SetInternalFlagsFromInteger(bb, value, sets_cc);
- SetRegister(bb, dest, value);
+ SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
+ SetRegister(bb, dest, lop3_fast);
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index e02bcd097..8e3b46e8e 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- UNIMPLEMENTED_IF(instr.bfe.negate_b);
-
Node op_a = GetRegister(instr.gpr8);
- op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::BFE_IMM: {
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in BFE is not implemented");
+ Node op_b = [&] {
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::BFE_R:
+ return GetRegister(instr.gpr20);
+ case OpCode::Id::BFE_C:
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+ case OpCode::Id::BFE_IMM:
+ return Immediate(instr.alu.GetSignedImm20_20());
+ default:
+ UNREACHABLE();
+ return Immediate(0);
+ }
+ }();
- const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue()));
- const Node outer_shift_imm =
- Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
+ UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
- const Node inner_shift =
- Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
- const Node outer_shift =
- Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
+ const bool is_signed = instr.bfe.is_signed;
- SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
- SetRegister(bb, instr.gpr0, outer_shift);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
+ // using reverse parallel method in
+ // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
+ // note for later if possible to implement faster method.
+ if (instr.bfe.brev) {
+ const auto swap = [&](u32 s, u32 mask) {
+ Node v1 =
+ SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
+ if (mask != 0) {
+ v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
+ Immediate(mask));
+ }
+ Node v2 = op_a;
+ if (mask != 0) {
+ v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
+ Immediate(mask));
+ }
+ v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
+ Immediate(s));
+ return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
+ std::move(v2));
+ };
+ op_a = swap(1, 0x55555555U);
+ op_a = swap(2, 0x33333333U);
+ op_a = swap(4, 0x0F0F0F0FU);
+ op_a = swap(8, 0x00FF00FFU);
+ op_a = swap(16, 0);
}
+ const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
+ Immediate(0), Immediate(8));
+ const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
+ Immediate(8), Immediate(8));
+ auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
+ SetRegister(bb, instr.gpr0, std::move(result));
+
return pc;
}
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index bee7d8cad..48350e042 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -12,6 +12,7 @@
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
if (sampler_info) {
return *sampler_info;
}
- const auto sampler =
- buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset);
+ const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
+ : registry.ObtainBoundSampler(offset);
if (!sampler) {
LOG_WARNING(HW_GPU, "Unknown sampler info");
return SamplerInfo{TextureType::Texture2D, false, false, false};
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 206961909..6191ffba1 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -12,6 +12,7 @@ namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
@@ -30,7 +31,7 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
const bool is_signed_b = instr.xmad.sign_b == 1;
const bool is_signed_c = is_signed_a;
- auto [is_merge, is_psl, is_high_b, mode, op_b,
+ auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::XMAD_CR:
@@ -63,15 +64,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
}
}();
- op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
+ op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
+ instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
- const Node original_b = op_b;
- op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16);
+ const Node original_b = op_b_binding;
+ const Node op_b =
+ SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
+ is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
- // TODO(Rodrigo): Use an appropiate sign for this operation
- Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
+ // we already check sign_a and sign_b is difference or not before so just use one in here.
+ Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
if (is_psl) {
- product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
+ product =
+ SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
}
SetTemporary(bb, 0, product);
product = GetTemporary(0);
@@ -88,12 +93,40 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
return BitfieldExtract(original_c, 16, 16);
case Tegra::Shader::XmadMode::CBcc: {
const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
- NO_PRECISE, original_b, Immediate(16));
- return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c,
- shifted_b);
+ original_b, Immediate(16));
+ return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
+ }
+ case Tegra::Shader::XmadMode::CSfu: {
+ const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a,
+ op_a, Immediate(0));
+ const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b,
+ op_b, Immediate(0));
+ const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
+
+ const Node comp_minus_a = GetPredicateComparisonInteger(
+ PredCondition::NotEqual, is_signed_a,
+ SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
+ Immediate(0x80000000)),
+ Immediate(0));
+ const Node comp_minus_b = GetPredicateComparisonInteger(
+ PredCondition::NotEqual, is_signed_b,
+ SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
+ Immediate(0x80000000)),
+ Immediate(0));
+
+ Node new_c = Operation(
+ OperationCode::Select, comp_minus_a,
+ SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
+ original_c);
+ new_c = Operation(
+ OperationCode::Select, comp_minus_b,
+ SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
+ std::move(new_c));
+
+ return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
}
default:
- UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value()));
+ UNREACHABLE();
return Immediate(0);
}
}();
@@ -102,18 +135,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
op_c = GetTemporary(1);
// TODO(Rodrigo): Use an appropiate sign for this operation
- Node sum = Operation(OperationCode::IAdd, product, op_c);
+ Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
SetTemporary(bb, 2, sum);
sum = GetTemporary(2);
if (is_merge) {
- const Node a = BitfieldExtract(sum, 0, 16);
- const Node b =
- Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16));
- sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b);
+ const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
+ Immediate(0), Immediate(16));
+ const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
+ Immediate(16));
+ sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
}
SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
- SetRegister(bb, instr.gpr0, sum);
+ SetRegister(bb, instr.gpr0, std::move(sum));
return pc;
}
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index a0a7b9111..a1828546e 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -299,7 +299,7 @@ private:
u32 index{}; ///< Emulated index given for the this sampler.
u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
- u32 size{}; ///< Size of the sampler if indexed.
+ u32 size{1}; ///< Size of the sampler.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index b3dcd291c..76c56abb5 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
return OperationCode::UBitwiseXor;
case OperationCode::IBitwiseNot:
return OperationCode::UBitwiseNot;
+ case OperationCode::IBitfieldExtract:
+ return OperationCode::UBitfieldExtract;
case OperationCode::IBitfieldInsert:
return OperationCode::UBitfieldInsert;
case OperationCode::IBitCount:
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
new file mode 100644
index 000000000..af70b3f35
--- /dev/null
+++ b/src/video_core/shader/registry.cpp
@@ -0,0 +1,161 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <tuple>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
+#include "video_core/shader/registry.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Engines::ConstBufferEngineInterface;
+using Tegra::Engines::SamplerDescriptor;
+using Tegra::Engines::ShaderType;
+
+namespace {
+
+GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
+ if (shader_stage == ShaderType::Compute) {
+ return {};
+ }
+ auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
+
+ GraphicsInfo info;
+ info.tfb_layouts = graphics.regs.tfb_layouts;
+ info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
+ info.primitive_topology = graphics.regs.draw.topology;
+ info.tessellation_primitive = graphics.regs.tess_mode.prim;
+ info.tessellation_spacing = graphics.regs.tess_mode.spacing;
+ info.tfb_enabled = graphics.regs.tfb_enabled;
+ info.tessellation_clockwise = graphics.regs.tess_mode.cw;
+ return info;
+}
+
+ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
+ if (shader_stage != ShaderType::Compute) {
+ return {};
+ }
+ auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
+ const auto& launch = compute.launch_description;
+
+ ComputeInfo info;
+ info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
+ info.local_memory_size_in_words = launch.local_pos_alloc;
+ info.shared_memory_size_in_words = launch.shared_alloc;
+ return info;
+}
+
+} // Anonymous namespace
+
+Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
+ : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
+ bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
+
+Registry::Registry(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine)
+ : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
+ graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
+ shader_stage, engine)} {}
+
+Registry::~Registry() = default;
+
+std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
+ const std::pair<u32, u32> key = {buffer, offset};
+ const auto iter = keys.find(key);
+ if (iter != keys.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
+ keys.emplace(key, value);
+ return value;
+}
+
+std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
+ const u32 key = offset;
+ const auto iter = bound_samplers.find(key);
+ if (iter != bound_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
+ bound_samplers.emplace(key, value);
+ return value;
+}
+
+std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
+ u32 offset) {
+ const std::pair key = {buffer, offset};
+ const auto iter = bindless_samplers.find(key);
+ if (iter != bindless_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
+ bindless_samplers.emplace(key, value);
+ return value;
+}
+
+void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
+ keys.insert_or_assign({buffer, offset}, value);
+}
+
+void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
+ bound_samplers.insert_or_assign(offset, sampler);
+}
+
+void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
+ bindless_samplers.insert_or_assign({buffer, offset}, sampler);
+}
+
+bool Registry::IsConsistent() const {
+ if (!engine) {
+ return true;
+ }
+ return std::all_of(keys.begin(), keys.end(),
+ [this](const auto& pair) {
+ const auto [cbuf, offset] = pair.first;
+ const auto value = pair.second;
+ return value == engine->AccessConstBuffer32(stage, cbuf, offset);
+ }) &&
+ std::all_of(bound_samplers.begin(), bound_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [key, value] = sampler;
+ return value == engine->AccessBoundSampler(stage, key);
+ }) &&
+ std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [cbuf, offset] = sampler.first;
+ const auto value = sampler.second;
+ return value == engine->AccessBindlessSampler(stage, cbuf, offset);
+ });
+}
+
+bool Registry::HasEqualKeys(const Registry& rhs) const {
+ return std::tie(keys, bound_samplers, bindless_samplers) ==
+ std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
+}
+
+const GraphicsInfo& Registry::GetGraphicsInfo() const {
+ ASSERT(stage != Tegra::Engines::ShaderType::Compute);
+ return graphics_info;
+}
+
+const ComputeInfo& Registry::GetComputeInfo() const {
+ ASSERT(stage == Tegra::Engines::ShaderType::Compute);
+ return compute_info;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
new file mode 100644
index 000000000..0c80d35fd
--- /dev/null
+++ b/src/video_core/shader/registry.h
@@ -0,0 +1,137 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <optional>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+
+#include "common/common_types.h"
+#include "common/hash.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
+
+namespace VideoCommon::Shader {
+
+using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
+using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using BindlessSamplerMap =
+ std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
+
+struct GraphicsInfo {
+ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+ std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
+ tfb_layouts{};
+ std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
+ Maxwell::PrimitiveTopology primitive_topology{};
+ Maxwell::TessellationPrimitive tessellation_primitive{};
+ Maxwell::TessellationSpacing tessellation_spacing{};
+ bool tfb_enabled = false;
+ bool tessellation_clockwise = false;
+};
+static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
+ std::is_standard_layout_v<GraphicsInfo>);
+
+struct ComputeInfo {
+ std::array<u32, 3> workgroup_size{};
+ u32 shared_memory_size_in_words = 0;
+ u32 local_memory_size_in_words = 0;
+};
+static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
+
+struct SerializedRegistryInfo {
+ VideoCore::GuestDriverProfile guest_driver_profile;
+ u32 bound_buffer = 0;
+ GraphicsInfo graphics;
+ ComputeInfo compute;
+};
+
+/**
+ * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
+ * With it, the shader can obtain required data from GPU state and store it for disk shader
+ * compilation.
+ */
+class Registry {
+public:
+ explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
+
+ explicit Registry(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine);
+
+ ~Registry();
+
+ /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
+ /// not it will obtain it from maxwell3d and register it.
+ std::optional<u32> ObtainKey(u32 buffer, u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
+
+ /// Inserts a key.
+ void InsertKey(u32 buffer, u32 offset, u32 value);
+
+ /// Inserts a bound sampler key.
+ void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Inserts a bindless sampler key.
+ void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Checks keys and samplers against engine's current const buffers.
+ /// Returns true if they are the same value, false otherwise.
+ bool IsConsistent() const;
+
+ /// Returns true if the keys are equal to the other ones in the registry.
+ bool HasEqualKeys(const Registry& rhs) const;
+
+ /// Returns graphics information from this shader
+ const GraphicsInfo& GetGraphicsInfo() const;
+
+ /// Returns compute information from this shader
+ const ComputeInfo& GetComputeInfo() const;
+
+ /// Gives an getter to the const buffer keys in the database.
+ const KeyMap& GetKeys() const {
+ return keys;
+ }
+
+ /// Gets samplers database.
+ const BoundSamplerMap& GetBoundSamplers() const {
+ return bound_samplers;
+ }
+
+ /// Gets bindless samplers database.
+ const BindlessSamplerMap& GetBindlessSamplers() const {
+ return bindless_samplers;
+ }
+
+ /// Gets bound buffer used on this shader
+ u32 GetBoundBuffer() const {
+ return bound_buffer;
+ }
+
+ /// Obtains access to the guest driver's profile.
+ VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
+ return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
+ }
+
+private:
+ const Tegra::Engines::ShaderType stage;
+ VideoCore::GuestDriverProfile stored_guest_driver_profile;
+ Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
+ KeyMap keys;
+ BoundSamplerMap bound_samplers;
+ BindlessSamplerMap bindless_samplers;
+ u32 bound_buffer;
+ GraphicsInfo graphics_info;
+ ComputeInfo compute_info;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 3a5d280a9..baf7188d2 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -11,6 +11,7 @@
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
- ConstBufferLocker& locker)
- : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
+ Registry& registry)
+ : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} {
Decode();
PostDecode();
}
@@ -95,6 +96,7 @@ Node ShaderIR::GetPredicate(bool immediate) {
}
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
+ MarkAttributeUsage(index, element);
used_input_attributes.emplace(index);
return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
}
@@ -105,42 +107,8 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres
}
Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
- if (index == Attribute::Index::LayerViewportPointSize) {
- switch (element) {
- case 0:
- UNIMPLEMENTED();
- break;
- case 1:
- uses_layer = true;
- break;
- case 2:
- uses_viewport_index = true;
- break;
- case 3:
- uses_point_size = true;
- break;
- }
- }
- if (index == Attribute::Index::TessCoordInstanceIDVertexID) {
- switch (element) {
- case 2:
- uses_instance_id = true;
- break;
- case 3:
- uses_vertex_id = true;
- break;
- default:
- break;
- }
- }
- if (index == Attribute::Index::ClipDistances0123 ||
- index == Attribute::Index::ClipDistances4567) {
- const auto clip_index =
- static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element);
- used_clip_distances.at(clip_index) = true;
- }
+ MarkAttributeUsage(index, element);
used_output_attributes.insert(index);
-
return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
}
@@ -451,6 +419,54 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
Immediate(bits));
}
+void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
+ switch (index) {
+ case Attribute::Index::LayerViewportPointSize:
+ switch (element) {
+ case 0:
+ UNIMPLEMENTED();
+ break;
+ case 1:
+ uses_layer = true;
+ break;
+ case 2:
+ uses_viewport_index = true;
+ break;
+ case 3:
+ uses_point_size = true;
+ break;
+ }
+ break;
+ case Attribute::Index::TessCoordInstanceIDVertexID:
+ switch (element) {
+ case 2:
+ uses_instance_id = true;
+ break;
+ case 3:
+ uses_vertex_id = true;
+ break;
+ }
+ break;
+ case Attribute::Index::ClipDistances0123:
+ case Attribute::Index::ClipDistances4567: {
+ const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
+ used_clip_distances.at(clip_index) = true;
+ break;
+ }
+ case Attribute::Index::FrontColor:
+ case Attribute::Index::FrontSecondaryColor:
+ case Attribute::Index::BackColor:
+ case Attribute::Index::BackSecondaryColor:
+ uses_legacy_varyings = true;
+ break;
+ default:
+ if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
+ uses_legacy_varyings = true;
+ }
+ break;
+ }
+}
+
std::size_t ShaderIR::DeclareAmend(Node new_amend) {
const std::size_t id = amend_code.size();
amend_code.push_back(new_amend);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index b0851c3be..80fc9b82c 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -18,8 +18,8 @@
#include "video_core/engines/shader_header.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/node.h"
+#include "video_core/shader/registry.h"
namespace VideoCommon::Shader {
@@ -69,7 +69,7 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
- ConstBufferLocker& locker);
+ Registry& registry);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -137,6 +137,10 @@ public:
return uses_vertex_id;
}
+ bool UsesLegacyVaryings() const {
+ return uses_legacy_varyings;
+ }
+
bool UsesWarps() const {
return uses_warps;
}
@@ -343,6 +347,9 @@ private:
/// Inserts a sequence of bits from a node
Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
+ /// Marks the usage of a input or output attribute.
+ void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
+
void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components);
@@ -414,7 +421,7 @@ private:
const ProgramCode& program_code;
const u32 main_offset;
const CompilerSettings settings;
- ConstBufferLocker& locker;
+ Registry& registry;
bool decompiled{};
bool disable_flow_stack{};
@@ -443,6 +450,7 @@ private:
bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
bool uses_instance_id{};
bool uses_vertex_id{};
+ bool uses_legacy_varyings{};
bool uses_warps{};
bool uses_indexed_samplers{};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index face8c943..10739b37d 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
return {tracked, track};
} else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
- auto bound_buffer = locker.ObtainBoundBuffer();
- if (!bound_buffer) {
+ const u32 bound_buffer = registry.GetBoundBuffer();
+ if (bound_buffer != cbuf->GetIndex()) {
return {};
}
- if (*bound_buffer != cbuf->GetIndex()) {
- return {};
- }
- auto pair = DecoupleIndirectRead(*operation);
+ const auto pair = DecoupleIndirectRead(*operation);
if (!pair) {
return {};
}
auto [gpr, base_offset] = *pair;
const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
- auto gpu_driver = locker.AccessGuestDriverProfile();
- if (gpu_driver == nullptr) {
- return {};
- }
+ const auto& gpu_driver = registry.AccessGuestDriverProfile();
const u32 bindless_cv = NewCustomVariable();
- const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr,
- Immediate(gpu_driver->GetTextureHandlerSize()));
+ const Node op =
+ Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
const Node cv_node = GetCustomVariable(bindless_cv);
Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
@@ -157,13 +151,21 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
return {};
}
- // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
- // register that it uses as operand
- const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
- if (!source) {
- return {};
+ s64 current_cursor = cursor;
+ while (current_cursor > 0) {
+ // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
+ // register that it uses as operand
+ const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1);
+ current_cursor = new_cursor;
+ if (!source) {
+ continue;
+ }
+ const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor);
+ if (base_address != nullptr) {
+ return {base_address, index, offset};
+ }
}
- return TrackCbuf(source, code, new_cursor);
+ return {};
}
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
new file mode 100644
index 000000000..22a933761
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.cpp
@@ -0,0 +1,115 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/registry.h"
+#include "video_core/shader/transform_feedback.h"
+
+namespace VideoCommon::Shader {
+
+namespace {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
+
+/// Attribute offsets that describe a vector
+constexpr std::array VECTORS = {
+ 28, // gl_Position
+ 32, // Generic 0
+ 36, // Generic 1
+ 40, // Generic 2
+ 44, // Generic 3
+ 48, // Generic 4
+ 52, // Generic 5
+ 56, // Generic 6
+ 60, // Generic 7
+ 64, // Generic 8
+ 68, // Generic 9
+ 72, // Generic 10
+ 76, // Generic 11
+ 80, // Generic 12
+ 84, // Generic 13
+ 88, // Generic 14
+ 92, // Generic 15
+ 96, // Generic 16
+ 100, // Generic 17
+ 104, // Generic 18
+ 108, // Generic 19
+ 112, // Generic 20
+ 116, // Generic 21
+ 120, // Generic 22
+ 124, // Generic 23
+ 128, // Generic 24
+ 132, // Generic 25
+ 136, // Generic 26
+ 140, // Generic 27
+ 144, // Generic 28
+ 148, // Generic 29
+ 152, // Generic 30
+ 156, // Generic 31
+ 160, // gl_FrontColor
+ 164, // gl_FrontSecondaryColor
+ 160, // gl_BackColor
+ 164, // gl_BackSecondaryColor
+ 192, // gl_TexCoord[0]
+ 196, // gl_TexCoord[1]
+ 200, // gl_TexCoord[2]
+ 204, // gl_TexCoord[3]
+ 208, // gl_TexCoord[4]
+ 212, // gl_TexCoord[5]
+ 216, // gl_TexCoord[6]
+ 220, // gl_TexCoord[7]
+};
+} // namespace
+
+std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
+
+ std::unordered_map<u8, VaryingTFB> tfb;
+
+ for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
+ const auto& locations = info.tfb_varying_locs[buffer];
+ const auto& layout = info.tfb_layouts[buffer];
+ const std::size_t varying_count = layout.varying_count;
+
+ std::size_t highest = 0;
+
+ for (std::size_t offset = 0; offset < varying_count; ++offset) {
+ const std::size_t base_offset = offset;
+ const u8 location = locations[offset];
+
+ VaryingTFB varying;
+ varying.buffer = layout.stream;
+ varying.stride = layout.stride;
+ varying.offset = offset * sizeof(u32);
+ varying.components = 1;
+
+ if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
+ UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
+
+ const u8 base_index = location / 4;
+ while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
+ ++offset;
+ ++varying.components;
+ }
+ }
+
+ [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
+ UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
+
+ highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
+ }
+
+ UNIMPLEMENTED_IF(highest != layout.stride);
+ }
+ return tfb;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
new file mode 100644
index 000000000..77d05f64c
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.h
@@ -0,0 +1,23 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+
+#include "common/common_types.h"
+#include "video_core/shader/registry.h"
+
+namespace VideoCommon::Shader {
+
+struct VaryingTFB {
+ std::size_t buffer;
+ std::size_t stride;
+ std::size_t offset;
+ std::size_t components;
+};
+
+std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
+
+} // namespace VideoCommon::Shader