aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/shader_bytecode.h438
-rw-r--r--src/video_core/memory_manager.cpp53
-rw-r--r--src/video_core/memory_manager.h6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h14
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h39
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp184
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp3
-rw-r--r--src/video_core/textures/decoders.cpp3
-rw-r--r--src/video_core/textures/texture.h1
12 files changed, 491 insertions, 279 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7cd125f05..5a006aee5 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -4,15 +4,24 @@
#pragma once
+#include <bitset>
#include <cstring>
#include <map>
#include <string>
+#include <vector>
+
+#include <boost/optional.hpp>
+
#include "common/bit_field.h"
+#include "common/common_types.h"
namespace Tegra {
namespace Shader {
struct Register {
+ // Register 255 is special cased to always be 0
+ static constexpr size_t ZeroIndex = 255;
+
constexpr Register() = default;
constexpr Register(u64 value) : value(value) {}
@@ -86,181 +95,12 @@ union Uniform {
BitField<34, 5, u64> index;
};
-union OpCode {
- enum class Id : u64 {
- TEXS = 0x6C,
- IPA = 0xE0,
- FMUL32_IMM = 0x1E,
- FFMA_IMM = 0x65,
- FFMA_CR = 0x93,
- FFMA_RC = 0xA3,
- FFMA_RR = 0xB3,
-
- FADD_C = 0x98B,
- FMUL_C = 0x98D,
- MUFU = 0xA10,
- FADD_R = 0xB8B,
- FMUL_R = 0xB8D,
- LD_A = 0x1DFB,
- ST_A = 0x1DFE,
-
- FSETP_R = 0x5BB,
- FSETP_C = 0x4BB,
- EXIT = 0xE30,
- KIL = 0xE33,
-
- FMUL_IMM = 0x70D,
- FMUL_IMM_x = 0x72D,
- FADD_IMM = 0x70B,
- FADD_IMM_x = 0x72B,
- };
-
- enum class Type {
- Trivial,
- Arithmetic,
- Ffma,
- Flow,
- Memory,
- Unknown,
- };
-
- struct Info {
- Type type;
- std::string name;
- };
-
- OpCode() = default;
-
- constexpr OpCode(Id value) : value(static_cast<u64>(value)) {}
-
- constexpr OpCode(u64 value) : value{value} {}
-
- constexpr Id EffectiveOpCode() const {
- switch (op1) {
- case Id::TEXS:
- return op1;
- }
-
- switch (op2) {
- case Id::IPA:
- case Id::FMUL32_IMM:
- return op2;
- }
-
- switch (op3) {
- case Id::FFMA_IMM:
- case Id::FFMA_CR:
- case Id::FFMA_RC:
- case Id::FFMA_RR:
- return op3;
- }
-
- switch (op4) {
- case Id::EXIT:
- case Id::FSETP_R:
- case Id::FSETP_C:
- case Id::KIL:
- return op4;
- }
-
- switch (op5) {
- case Id::MUFU:
- case Id::LD_A:
- case Id::ST_A:
- case Id::FADD_R:
- case Id::FADD_C:
- case Id::FMUL_R:
- case Id::FMUL_C:
- return op5;
-
- case Id::FMUL_IMM:
- case Id::FMUL_IMM_x:
- return Id::FMUL_IMM;
-
- case Id::FADD_IMM:
- case Id::FADD_IMM_x:
- return Id::FADD_IMM;
- }
-
- return static_cast<Id>(value);
- }
-
- static const Info& GetInfo(const OpCode& opcode) {
- static const std::map<Id, Info> info_table{BuildInfoTable()};
- const auto& search{info_table.find(opcode.EffectiveOpCode())};
- if (search != info_table.end()) {
- return search->second;
- }
-
- static const Info unknown{Type::Unknown, "UNK"};
- return unknown;
- }
-
- constexpr operator Id() const {
- return static_cast<Id>(value);
- }
-
- constexpr OpCode operator<<(size_t bits) const {
- return value << bits;
- }
-
- constexpr OpCode operator>>(size_t bits) const {
- return value >> bits;
- }
-
- template <typename T>
- constexpr u64 operator-(const T& oth) const {
- return value - oth;
- }
-
- constexpr u64 operator&(const OpCode& oth) const {
- return value & oth.value;
- }
-
- constexpr u64 operator~() const {
- return ~value;
- }
-
- static std::map<Id, Info> BuildInfoTable() {
- std::map<Id, Info> info_table;
- info_table[Id::TEXS] = {Type::Memory, "texs"};
- info_table[Id::LD_A] = {Type::Memory, "ld_a"};
- info_table[Id::ST_A] = {Type::Memory, "st_a"};
- info_table[Id::MUFU] = {Type::Arithmetic, "mufu"};
- info_table[Id::FFMA_IMM] = {Type::Ffma, "ffma_imm"};
- info_table[Id::FFMA_CR] = {Type::Ffma, "ffma_cr"};
- info_table[Id::FFMA_RC] = {Type::Ffma, "ffma_rc"};
- info_table[Id::FFMA_RR] = {Type::Ffma, "ffma_rr"};
- info_table[Id::FADD_R] = {Type::Arithmetic, "fadd_r"};
- info_table[Id::FADD_C] = {Type::Arithmetic, "fadd_c"};
- info_table[Id::FADD_IMM] = {Type::Arithmetic, "fadd_imm"};
- info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"};
- info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"};
- info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"};
- info_table[Id::FMUL32_IMM] = {Type::Arithmetic, "fmul32_imm"};
- info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"};
- info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"};
- info_table[Id::EXIT] = {Type::Trivial, "exit"};
- info_table[Id::IPA] = {Type::Trivial, "ipa"};
- info_table[Id::KIL] = {Type::Flow, "kil"};
- return info_table;
- }
-
- BitField<57, 7, Id> op1;
- BitField<56, 8, Id> op2;
- BitField<55, 9, Id> op3;
- BitField<52, 12, Id> op4;
- BitField<51, 13, Id> op5;
- u64 value{};
-};
-static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size");
-
} // namespace Shader
} // namespace Tegra
namespace std {
-// TODO(bunne): The below is forbidden by the C++ standard, but works fine. See #330.
+// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330.
template <>
struct make_unsigned<Tegra::Shader::Attribute> {
using type = Tegra::Shader::Attribute;
@@ -271,11 +111,6 @@ struct make_unsigned<Tegra::Shader::Register> {
using type = Tegra::Shader::Register;
};
-template <>
-struct make_unsigned<Tegra::Shader::OpCode> {
- using type = Tegra::Shader::OpCode;
-};
-
} // namespace std
namespace Tegra {
@@ -283,7 +118,23 @@ namespace Shader {
enum class Pred : u64 {
UnusedIndex = 0x7,
- NeverExecute = 0xf,
+ NeverExecute = 0xF,
+};
+
+enum class PredCondition : u64 {
+ LessThan = 1,
+ Equal = 2,
+ LessEqual = 3,
+ GreaterThan = 4,
+ NotEqual = 5,
+ GreaterEqual = 6,
+ // TODO(Subv): Other condition types
+};
+
+enum class PredOperation : u64 {
+ And = 0,
+ Or = 1,
+ Xor = 2,
};
enum class SubOp : u64 {
@@ -298,18 +149,24 @@ enum class SubOp : u64 {
union Instruction {
Instruction& operator=(const Instruction& instr) {
- hex = instr.hex;
+ value = instr.value;
return *this;
}
- OpCode opcode;
+ constexpr Instruction(u64 value) : value{value} {}
+
BitField<0, 8, Register> gpr0;
BitField<8, 8, Register> gpr8;
- BitField<16, 4, Pred> pred;
+ union {
+ BitField<16, 4, Pred> full_pred;
+ BitField<16, 3, u64> pred_index;
+ } pred;
+ BitField<19, 1, u64> negate_pred;
BitField<20, 8, Register> gpr20;
BitField<20, 7, SubOp> sub_op;
BitField<28, 8, Register> gpr28;
BitField<39, 8, Register> gpr39;
+ BitField<48, 16, u64> opcode;
union {
BitField<20, 19, u64> imm20_19;
@@ -343,6 +200,20 @@ union Instruction {
BitField<49, 1, u64> negate_c;
} ffma;
+ union {
+ BitField<0, 3, u64> pred0;
+ BitField<3, 3, u64> pred3;
+ BitField<7, 1, u64> abs_a;
+ BitField<39, 3, u64> pred39;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, PredOperation> op;
+ BitField<47, 1, u64> ftz;
+ BitField<48, 4, PredCondition> cond;
+ BitField<56, 1, u64> neg_b;
+ } fsetp;
+
BitField<61, 1, u64> is_b_imm;
BitField<60, 1, u64> is_b_gpr;
BitField<59, 1, u64> is_c_gpr;
@@ -351,11 +222,218 @@ union Instruction {
Uniform uniform;
Sampler sampler;
- u64 hex;
+ u64 value;
};
static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
static_assert(std::is_standard_layout<Instruction>::value,
"Structure does not have standard layout");
+class OpCode {
+public:
+ enum class Id {
+ KIL,
+ LD_A,
+ ST_A,
+ TEXQ, // Texture Query
+ TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
+ TLDS, // Texture Load with scalar/non-vec4 source/destinations
+ EXIT,
+ IPA,
+ FFMA_IMM, // Fused Multiply and Add
+ FFMA_CR,
+ FFMA_RC,
+ FFMA_RR,
+ FADD_C,
+ FADD_R,
+ FADD_IMM,
+ FMUL_C,
+ FMUL_R,
+ FMUL_IMM,
+ FMUL32_IMM,
+ MUFU, // Multi-Function Operator
+ RRO, // Range Reduction Operator
+ F2F_C,
+ F2F_R,
+ F2F_IMM,
+ F2I_C,
+ F2I_R,
+ F2I_IMM,
+ I2F_C,
+ I2F_R,
+ I2F_IMM,
+ LOP32I,
+ MOV_C,
+ MOV_R,
+ MOV_IMM,
+ MOV32I,
+ SHR_C,
+ SHR_R,
+ SHR_IMM,
+ FSETP_C, // Set Predicate
+ FSETP_R,
+ FSETP_IMM,
+ ISETP_C,
+ ISETP_IMM,
+ ISETP_R,
+ };
+
+ enum class Type {
+ Trivial,
+ Arithmetic,
+ Ffma,
+ Flow,
+ Memory,
+ FloatPredicate,
+ IntegerPredicate,
+ Unknown,
+ };
+
+ class Matcher {
+ public:
+ Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type)
+ : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
+
+ const char* GetName() const {
+ return name;
+ }
+
+ u16 GetMask() const {
+ return mask;
+ }
+
+ Id GetId() const {
+ return id;
+ }
+
+ Type GetType() const {
+ return type;
+ }
+
+ /**
+ * Tests to see if the given instruction is the instruction this matcher represents.
+ * @param instruction The instruction to test
+ * @returns true if the given instruction matches.
+ */
+ bool Matches(u16 instruction) const {
+ return (instruction & mask) == expected;
+ }
+
+ private:
+ const char* name;
+ u16 mask;
+ u16 expected;
+ Id id;
+ Type type;
+ };
+
+ static boost::optional<const Matcher&> Decode(Instruction instr) {
+ static const auto table{GetDecodeTable()};
+
+ const auto matches_instruction = [instr](const auto& matcher) {
+ return matcher.Matches(static_cast<u16>(instr.opcode));
+ };
+
+ auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
+ return iter != table.end() ? boost::optional<const Matcher&>(*iter) : boost::none;
+ }
+
+private:
+ struct Detail {
+ private:
+ static constexpr size_t opcode_bitsize = 16;
+
+ /**
+ * Generates the mask and the expected value after masking from a given bitstring.
+ * A '0' in a bitstring indicates that a zero must be present at that bit position.
+ * A '1' in a bitstring indicates that a one must be present at that bit position.
+ */
+ static auto GetMaskAndExpect(const char* const bitstring) {
+ u16 mask = 0, expect = 0;
+ for (size_t i = 0; i < opcode_bitsize; i++) {
+ const size_t bit_position = opcode_bitsize - i - 1;
+ switch (bitstring[i]) {
+ case '0':
+ mask |= 1 << bit_position;
+ break;
+ case '1':
+ expect |= 1 << bit_position;
+ mask |= 1 << bit_position;
+ break;
+ default:
+ // Ignore
+ break;
+ }
+ }
+ return std::make_tuple(mask, expect);
+ }
+
+ public:
+ /// Creates a matcher that can match and parse instructions based on bitstring.
+ static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type,
+ const char* const name) {
+ const auto mask_expect = GetMaskAndExpect(bitstring);
+ return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type);
+ }
+ };
+
+ static std::vector<Matcher> GetDecodeTable() {
+ std::vector<Matcher> table = {
+#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
+ INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
+ INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
+ INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+ INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
+ INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
+ INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
+ INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
+ INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
+ INST("001100101-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
+ INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
+ INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
+ INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
+ INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
+ INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
+ INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
+ INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
+ INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
+ INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
+ INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
+ INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
+ INST("0101110010010---", Id::RRO, Type::Arithmetic, "RRO"),
+ INST("0100110010101---", Id::F2F_C, Type::Arithmetic, "F2F_C"),
+ INST("0101110010101---", Id::F2F_R, Type::Arithmetic, "F2F_R"),
+ INST("0011100-10101---", Id::F2F_IMM, Type::Arithmetic, "F2F_IMM"),
+ INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"),
+ INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"),
+ INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"),
+ INST("0100110010111---", Id::I2F_C, Type::Arithmetic, "I2F_C"),
+ INST("0101110010111---", Id::I2F_R, Type::Arithmetic, "I2F_R"),
+ INST("0011100-10111---", Id::I2F_IMM, Type::Arithmetic, "I2F_IMM"),
+ INST("000001----------", Id::LOP32I, Type::Arithmetic, "LOP32I"),
+ INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
+ INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
+ INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
+ INST("000000010000----", Id::MOV32I, Type::Arithmetic, "MOV32I"),
+ INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"),
+ INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"),
+ INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"),
+ INST("010010111011----", Id::FSETP_C, Type::FloatPredicate, "FSETP_C"),
+ INST("010110111011----", Id::FSETP_R, Type::FloatPredicate, "FSETP_R"),
+ INST("0011011-1011----", Id::FSETP_IMM, Type::FloatPredicate, "FSETP_IMM"),
+ INST("010010110110----", Id::ISETP_C, Type::IntegerPredicate, "ISETP_C"),
+ INST("010110110110----", Id::ISETP_R, Type::IntegerPredicate, "ISETP_R"),
+ INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerPredicate, "ISETP_IMM"),
+ };
+#undef INST
+ std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
+ // If a matcher has more bits in its mask it is more specific, so it
+ // should come first.
+ return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count();
+ });
+
+ return table;
+ }
+};
+
} // namespace Shader
} // namespace Tegra
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 2789a4ca1..2e1edee03 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/memory_manager.h"
@@ -11,7 +12,8 @@ PAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
boost::optional<PAddr> paddr = FindFreeBlock(size, align);
ASSERT(paddr);
- for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
+ for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
+ ASSERT(PageSlot(*paddr + offset) == static_cast<u64>(PageStatus::Unmapped));
PageSlot(*paddr + offset) = static_cast<u64>(PageStatus::Allocated);
}
@@ -19,13 +21,8 @@ PAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
}
PAddr MemoryManager::AllocateSpace(PAddr paddr, u64 size, u64 align) {
- for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
- if (IsPageMapped(paddr + offset)) {
- return AllocateSpace(size, align);
- }
- }
-
- for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
+ for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
+ ASSERT(PageSlot(paddr + offset) == static_cast<u64>(PageStatus::Unmapped));
PageSlot(paddr + offset) = static_cast<u64>(PageStatus::Allocated);
}
@@ -33,12 +30,11 @@ PAddr MemoryManager::AllocateSpace(PAddr paddr, u64 size, u64 align) {
}
PAddr MemoryManager::MapBufferEx(VAddr vaddr, u64 size) {
- vaddr &= ~Memory::PAGE_MASK;
-
- boost::optional<PAddr> paddr = FindFreeBlock(size);
+ boost::optional<PAddr> paddr = FindFreeBlock(size, PAGE_SIZE);
ASSERT(paddr);
- for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
+ for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
+ ASSERT(PageSlot(*paddr + offset) == static_cast<u64>(PageStatus::Unmapped));
PageSlot(*paddr + offset) = vaddr + offset;
}
@@ -46,16 +42,10 @@ PAddr MemoryManager::MapBufferEx(VAddr vaddr, u64 size) {
}
PAddr MemoryManager::MapBufferEx(VAddr vaddr, PAddr paddr, u64 size) {
- vaddr &= ~Memory::PAGE_MASK;
- paddr &= ~Memory::PAGE_MASK;
+ ASSERT((paddr & PAGE_MASK) == 0);
- for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
- if (PageSlot(paddr + offset) != static_cast<u64>(PageStatus::Allocated)) {
- return MapBufferEx(vaddr, size);
- }
- }
-
- for (u64 offset = 0; offset < size; offset += Memory::PAGE_SIZE) {
+ for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
+ ASSERT(PageSlot(paddr + offset) == static_cast<u64>(PageStatus::Allocated));
PageSlot(paddr + offset) = vaddr + offset;
}
@@ -63,23 +53,20 @@ PAddr MemoryManager::MapBufferEx(VAddr vaddr, PAddr paddr, u64 size) {
}
boost::optional<PAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
- PAddr paddr{};
- u64 free_space{};
- align = (align + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
+ PAddr paddr = 0;
+ u64 free_space = 0;
+ align = (align + PAGE_MASK) & ~PAGE_MASK;
while (paddr + free_space < MAX_ADDRESS) {
if (!IsPageMapped(paddr + free_space)) {
- free_space += Memory::PAGE_SIZE;
+ free_space += PAGE_SIZE;
if (free_space >= size) {
return paddr;
}
} else {
- paddr += free_space + Memory::PAGE_SIZE;
+ paddr += free_space + PAGE_SIZE;
free_space = 0;
- const u64 remainder{paddr % align};
- if (!remainder) {
- paddr = (paddr - remainder) + align;
- }
+ paddr = Common::AlignUp(paddr, align);
}
}
@@ -89,7 +76,7 @@ boost::optional<PAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
VAddr MemoryManager::PhysicalToVirtualAddress(PAddr paddr) {
VAddr base_addr = PageSlot(paddr);
ASSERT(base_addr != static_cast<u64>(PageStatus::Unmapped));
- return base_addr + (paddr & Memory::PAGE_MASK);
+ return base_addr + (paddr & PAGE_MASK);
}
bool MemoryManager::IsPageMapped(PAddr paddr) {
@@ -97,14 +84,14 @@ bool MemoryManager::IsPageMapped(PAddr paddr) {
}
VAddr& MemoryManager::PageSlot(PAddr paddr) {
- auto& block = page_table[(paddr >> (Memory::PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK];
+ auto& block = page_table[(paddr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK];
if (!block) {
block = std::make_unique<PageBlock>();
for (unsigned index = 0; index < PAGE_BLOCK_SIZE; index++) {
(*block)[index] = static_cast<u64>(PageStatus::Unmapped);
}
}
- return (*block)[(paddr >> Memory::PAGE_BITS) & PAGE_BLOCK_MASK];
+ return (*block)[(paddr >> PAGE_BITS) & PAGE_BLOCK_MASK];
}
} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 47da7acd6..b73e283f8 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -24,6 +24,10 @@ public:
PAddr MapBufferEx(VAddr vaddr, PAddr paddr, u64 size);
VAddr PhysicalToVirtualAddress(PAddr paddr);
+ static constexpr u64 PAGE_BITS = 16;
+ static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
+ static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
+
private:
boost::optional<PAddr> FindFreeBlock(u64 size, u64 align = 1);
bool IsPageMapped(PAddr paddr);
@@ -35,7 +39,7 @@ private:
};
static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
- static constexpr u64 PAGE_TABLE_BITS{14};
+ static constexpr u64 PAGE_TABLE_BITS{10};
static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1};
static constexpr u64 PAGE_BLOCK_BITS{14};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 170548528..2d4a0d6db 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -14,7 +14,6 @@
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
-#include "common/vector_math.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9ece415f7..03e02b52a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -6,15 +6,10 @@
#include <array>
#include <cstddef>
-#include <cstring>
#include <memory>
-#include <unordered_map>
#include <vector>
#include <glad/glad.h>
-#include "common/bit_field.h"
#include "common/common_types.h"
-#include "common/hash.h"
-#include "common/vector_math.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 6c1c6775a..7410471cc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -7,7 +7,6 @@
#include <cstring>
#include <iterator>
#include <memory>
-#include <unordered_set>
#include <utility>
#include <vector>
#include <boost/optional.hpp>
@@ -20,7 +19,6 @@
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
-#include "common/vector_math.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/hle/kernel/process.h"
@@ -51,6 +49,7 @@ struct FormatTuple {
static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1}, // ABGR8
{GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1}, // B5G6R5
+ {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false, 1}, // A2B10G10R10
{GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT23
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT45
@@ -106,9 +105,9 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr b
static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
- MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
- MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
- MortonCopy<true, PixelFormat::DXT45>,
+ MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
+ MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::DXT1>,
+ MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
};
static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
@@ -116,6 +115,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
gl_to_morton_fns = {
MortonCopy<false, PixelFormat::ABGR8>,
MortonCopy<false, PixelFormat::B5G6R5>,
+ MortonCopy<false, PixelFormat::A2B10G10R10>,
// TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported
nullptr,
nullptr,
@@ -672,7 +672,8 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
}
-enum MatchFlags {
+enum class MatchFlags {
+ None = 0,
Invalid = 1, // Flag that can be applied to other match types, invalid matches require
// validation before they can be used
Exact = 1 << 1, // Surfaces perfectly match
@@ -686,6 +687,10 @@ constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) {
return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs));
}
+constexpr MatchFlags operator&(MatchFlags lhs, MatchFlags rhs) {
+ return static_cast<MatchFlags>(static_cast<int>(lhs) & static_cast<int>(rhs));
+}
+
/// Get the best surface match (and its match type) for the given flags
template <MatchFlags find_flags>
Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params,
@@ -703,15 +708,15 @@ Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params
: (params.res_scale <= surface->res_scale);
// validity will be checked in GetCopyableInterval
bool is_valid =
- find_flags & MatchFlags::Copy
+ (find_flags & MatchFlags::Copy) != MatchFlags::None
? true
: surface->IsRegionValid(validate_interval.value_or(params.GetInterval()));
- if (!(find_flags & MatchFlags::Invalid) && !is_valid)
+ if ((find_flags & MatchFlags::Invalid) == MatchFlags::None && !is_valid)
continue;
auto IsMatch_Helper = [&](auto check_type, auto match_fn) {
- if (!(find_flags & check_type))
+ if ((find_flags & check_type) == MatchFlags::None)
return;
bool matched;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 6861efe16..bf0fabb29 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -54,9 +54,10 @@ struct SurfaceParams {
enum class PixelFormat {
ABGR8 = 0,
B5G6R5 = 1,
- DXT1 = 2,
- DXT23 = 3,
- DXT45 = 4,
+ A2B10G10R10 = 2,
+ DXT1 = 3,
+ DXT23 = 4,
+ DXT45 = 5,
Max,
Invalid = 255,
@@ -88,6 +89,7 @@ struct SurfaceParams {
constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = {
32, // ABGR8
16, // B5G6R5
+ 32, // A2B10G10R10
64, // DXT1
128, // DXT23
128, // DXT45
@@ -104,6 +106,8 @@ struct SurfaceParams {
switch (format) {
case Tegra::RenderTargetFormat::RGBA8_UNORM:
return PixelFormat::ABGR8;
+ case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
+ return PixelFormat::A2B10G10R10;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -127,6 +131,8 @@ struct SurfaceParams {
return PixelFormat::ABGR8;
case Tegra::Texture::TextureFormat::B5G6R5:
return PixelFormat::B5G6R5;
+ case Tegra::Texture::TextureFormat::A2B10G10R10:
+ return PixelFormat::A2B10G10R10;
case Tegra::Texture::TextureFormat::DXT1:
return PixelFormat::DXT1;
case Tegra::Texture::TextureFormat::DXT23:
@@ -146,6 +152,8 @@ struct SurfaceParams {
return Tegra::Texture::TextureFormat::A8R8G8B8;
case PixelFormat::B5G6R5:
return Tegra::Texture::TextureFormat::B5G6R5;
+ case PixelFormat::A2B10G10R10:
+ return Tegra::Texture::TextureFormat::A2B10G10R10;
case PixelFormat::DXT1:
return Tegra::Texture::TextureFormat::DXT1;
case PixelFormat::DXT23:
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 2f0e7ac1a..93f9172e7 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -14,13 +14,13 @@ class OGLTexture : private NonCopyable {
public:
OGLTexture() = default;
- OGLTexture(OGLTexture&& o) : handle(std::exchange(o.handle, 0)) {}
+ OGLTexture(OGLTexture&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLTexture() {
Release();
}
- OGLTexture& operator=(OGLTexture&& o) {
+ OGLTexture& operator=(OGLTexture&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
@@ -49,13 +49,13 @@ class OGLSampler : private NonCopyable {
public:
OGLSampler() = default;
- OGLSampler(OGLSampler&& o) : handle(std::exchange(o.handle, 0)) {}
+ OGLSampler(OGLSampler&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLSampler() {
Release();
}
- OGLSampler& operator=(OGLSampler&& o) {
+ OGLSampler& operator=(OGLSampler&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
@@ -84,13 +84,13 @@ class OGLShader : private NonCopyable {
public:
OGLShader() = default;
- OGLShader(OGLShader&& o) : handle(std::exchange(o.handle, 0)) {}
+ OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLShader() {
Release();
}
- OGLShader& operator=(OGLShader&& o) {
+ OGLShader& operator=(OGLShader&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
@@ -118,13 +118,13 @@ class OGLProgram : private NonCopyable {
public:
OGLProgram() = default;
- OGLProgram(OGLProgram&& o) : handle(std::exchange(o.handle, 0)) {}
+ OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLProgram() {
Release();
}
- OGLProgram& operator=(OGLProgram&& o) {
+ OGLProgram& operator=(OGLProgram&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
@@ -165,13 +165,12 @@ public:
class OGLPipeline : private NonCopyable {
public:
OGLPipeline() = default;
- OGLPipeline(OGLPipeline&& o) {
- handle = std::exchange<GLuint>(o.handle, 0);
- }
+ OGLPipeline(OGLPipeline&& o) noexcept : handle{std::exchange<GLuint>(o.handle, 0)} {}
+
~OGLPipeline() {
Release();
}
- OGLPipeline& operator=(OGLPipeline&& o) {
+ OGLPipeline& operator=(OGLPipeline&& o) noexcept {
handle = std::exchange<GLuint>(o.handle, 0);
return *this;
}
@@ -199,13 +198,13 @@ class OGLBuffer : private NonCopyable {
public:
OGLBuffer() = default;
- OGLBuffer(OGLBuffer&& o) : handle(std::exchange(o.handle, 0)) {}
+ OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLBuffer() {
Release();
}
- OGLBuffer& operator=(OGLBuffer&& o) {
+ OGLBuffer& operator=(OGLBuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
@@ -234,12 +233,12 @@ class OGLSync : private NonCopyable {
public:
OGLSync() = default;
- OGLSync(OGLSync&& o) : handle(std::exchange(o.handle, nullptr)) {}
+ OGLSync(OGLSync&& o) noexcept : handle(std::exchange(o.handle, nullptr)) {}
~OGLSync() {
Release();
}
- OGLSync& operator=(OGLSync&& o) {
+ OGLSync& operator=(OGLSync&& o) noexcept {
Release();
handle = std::exchange(o.handle, nullptr);
return *this;
@@ -267,13 +266,13 @@ class OGLVertexArray : private NonCopyable {
public:
OGLVertexArray() = default;
- OGLVertexArray(OGLVertexArray&& o) : handle(std::exchange(o.handle, 0)) {}
+ OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLVertexArray() {
Release();
}
- OGLVertexArray& operator=(OGLVertexArray&& o) {
+ OGLVertexArray& operator=(OGLVertexArray&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
@@ -302,13 +301,13 @@ class OGLFramebuffer : private NonCopyable {
public:
OGLFramebuffer() = default;
- OGLFramebuffer(OGLFramebuffer&& o) : handle(std::exchange(o.handle, 0)) {}
+ OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLFramebuffer() {
Release();
}
- OGLFramebuffer& operator=(OGLFramebuffer&& o) {
+ OGLFramebuffer& operator=(OGLFramebuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index de137558d..086424395 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -97,11 +97,12 @@ private:
return exit_method;
for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
- const Instruction instr = {program_code[offset]};
- switch (instr.opcode.EffectiveOpCode()) {
- case OpCode::Id::EXIT: {
- return exit_method = ExitMethod::AlwaysEnd;
- }
+ if (const auto opcode = OpCode::Decode({program_code[offset]})) {
+ switch (opcode->GetId()) {
+ case OpCode::Id::EXIT: {
+ return exit_method = ExitMethod::AlwaysEnd;
+ }
+ }
}
}
return exit_method = ExitMethod::AlwaysReturn;
@@ -220,6 +221,8 @@ private:
/// Generates code representing a temporary (GPR) register.
std::string GetRegister(const Register& reg, unsigned elem = 0) {
+ if (reg == Register::ZeroIndex)
+ return "0";
if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) {
// GPRs 0-3 are output color for the fragment shader
return std::string{"color."} + "rgba"[(reg + elem) & 3];
@@ -276,6 +279,52 @@ private:
shader.AddLine(dest + " = " + src + ";");
}
+ /*
+ * Writes code that assigns a predicate boolean variable.
+ * @param pred The id of the predicate to write to.
+ * @param value The expression value to assign to the predicate.
+ */
+ void SetPredicate(u64 pred, const std::string& value) {
+ using Tegra::Shader::Pred;
+ // Can't assign to the constant predicate.
+ ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
+
+ std::string variable = 'p' + std::to_string(pred);
+ shader.AddLine(variable + " = " + value + ';');
+ declr_predicates.insert(std::move(variable));
+ }
+
+ /*
+ * Returns the condition to use in the 'if' for a predicated instruction.
+ * @param instr Instruction to generate the if condition for.
+ * @returns string containing the predicate condition.
+ */
+ std::string GetPredicateCondition(Instruction instr) const {
+ using Tegra::Shader::Pred;
+ ASSERT(instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex));
+
+ std::string variable =
+ 'p' + std::to_string(static_cast<u64>(instr.pred.pred_index.Value()));
+
+ if (instr.negate_pred) {
+ return "!(" + variable + ')';
+ }
+
+ return variable;
+ }
+
+ /*
+ * Returns whether the instruction at the specified offset is a 'sched' instruction.
+ * Sched instructions always appear before a sequence of 3 instructions.
+ */
+ bool IsSchedInstruction(u32 offset) const {
+ // sched instructions appear once every 4 instructions.
+ static constexpr size_t SchedPeriod = 4;
+ u32 absolute_offset = offset - main_offset;
+
+ return (absolute_offset % SchedPeriod) == 0;
+ }
+
/**
* Compiles a single instruction from Tegra to GLSL.
* @param offset the offset of the Tegra shader instruction.
@@ -283,11 +332,33 @@ private:
* + 1. If the current instruction always terminates the program, returns PROGRAM_END.
*/
u32 CompileInstr(u32 offset) {
+ // Ignore sched instructions when generating code.
+ if (IsSchedInstruction(offset)) {
+ return offset + 1;
+ }
+
const Instruction instr = {program_code[offset]};
+ const auto opcode = OpCode::Decode(instr);
+
+ // Decoding failure
+ if (!opcode) {
+ NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {0:x}", instr.value);
+ UNREACHABLE();
+ }
+
+ shader.AddLine("// " + std::to_string(offset) + ": " + opcode->GetName());
- shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name);
+ using Tegra::Shader::Pred;
+ ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute,
+ "NeverExecute predicate not implemented");
- switch (OpCode::GetInfo(instr.opcode).type) {
+ if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
+ shader.AddLine("if (" + GetPredicateCondition(instr) + ')');
+ shader.AddLine('{');
+ ++shader.scope;
+ }
+
+ switch (opcode->GetType()) {
case OpCode::Type::Arithmetic: {
std::string dest = GetRegister(instr.gpr0);
std::string op_a = instr.alu.negate_a ? "-" : "";
@@ -312,7 +383,7 @@ private:
op_b = "abs(" + op_b + ")";
}
- switch (instr.opcode.EffectiveOpCode()) {
+ switch (opcode->GetId()) {
case OpCode::Id::FMUL_C:
case OpCode::Id::FMUL_R:
case OpCode::Id::FMUL_IMM: {
@@ -354,16 +425,18 @@ private:
SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d);
break;
default:
- NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {}",
+ NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
static_cast<unsigned>(instr.sub_op.Value()));
UNREACHABLE();
}
break;
}
+ case OpCode::Id::RRO: {
+ NGLOG_DEBUG(HW_GPU, "Skipping RRO instruction");
+ break;
+ }
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {} ({}): {}",
- static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
- OpCode::GetInfo(instr.opcode).name, instr.hex);
+ NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
@@ -375,7 +448,7 @@ private:
std::string op_b = instr.ffma.negate_b ? "-" : "";
std::string op_c = instr.ffma.negate_c ? "-" : "";
- switch (instr.opcode.EffectiveOpCode()) {
+ switch (opcode->GetId()) {
case OpCode::Id::FFMA_CR: {
op_b += GetUniform(instr.uniform);
op_c += GetRegister(instr.gpr39);
@@ -397,9 +470,7 @@ private:
break;
}
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {} ({}): {}",
- static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
- OpCode::GetInfo(instr.opcode).name, instr.hex);
+ NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
@@ -411,7 +482,7 @@ private:
std::string gpr0 = GetRegister(instr.gpr0);
const Attribute::Index attribute = instr.attribute.fmt20.index;
- switch (instr.opcode.EffectiveOpCode()) {
+ switch (opcode->GetId()) {
case OpCode::Id::LD_A: {
ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4);
@@ -442,22 +513,76 @@ private:
break;
}
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {} ({}): {}",
- static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
- OpCode::GetInfo(instr.opcode).name, instr.hex);
+ NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
break;
}
+ case OpCode::Type::FloatPredicate: {
+ std::string op_a = instr.fsetp.neg_a ? "-" : "";
+ op_a += GetRegister(instr.gpr8);
+
+ if (instr.fsetp.abs_a) {
+ op_a = "abs(" + op_a + ')';
+ }
+
+ std::string op_b{};
+
+ if (instr.is_b_imm) {
+ if (instr.fsetp.neg_b) {
+ // Only the immediate version of fsetp has a neg_b bit.
+ op_b += '-';
+ }
+ op_b += '(' + GetImmediate19(instr) + ')';
+ } else {
+ if (instr.is_b_gpr) {
+ op_b += GetRegister(instr.gpr20);
+ } else {
+ op_b += GetUniform(instr.uniform);
+ }
+ }
+
+ if (instr.fsetp.abs_b) {
+ op_b = "abs(" + op_b + ')';
+ }
+
+ using Tegra::Shader::Pred;
+ ASSERT_MSG(instr.fsetp.pred0 == static_cast<u64>(Pred::UnusedIndex) &&
+ instr.fsetp.pred39 == static_cast<u64>(Pred::UnusedIndex),
+ "Compound predicates are not implemented");
+
+ // We can't use the constant predicate as destination.
+ ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+ using Tegra::Shader::PredCondition;
+ switch (instr.fsetp.cond) {
+ case PredCondition::LessThan:
+ SetPredicate(instr.fsetp.pred3, '(' + op_a + ") < (" + op_b + ')');
+ break;
+ case PredCondition::Equal:
+ SetPredicate(instr.fsetp.pred3, '(' + op_a + ") == (" + op_b + ')');
+ break;
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unhandled predicate condition: {} (a: {}, b: {})",
+ static_cast<unsigned>(instr.fsetp.cond.Value()), op_a, op_b);
+ UNREACHABLE();
+ }
+ break;
+ }
default: {
- switch (instr.opcode.EffectiveOpCode()) {
+ switch (opcode->GetId()) {
case OpCode::Id::EXIT: {
+ ASSERT_MSG(instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex),
+ "Predicated exits not implemented");
shader.AddLine("return true;");
offset = PROGRAM_END - 1;
break;
}
+ case OpCode::Id::KIL: {
+ shader.AddLine("discard;");
+ break;
+ }
case OpCode::Id::IPA: {
const auto& attribute = instr.attribute.fmt28;
std::string dest = GetRegister(instr.gpr0);
@@ -465,9 +590,7 @@ private:
break;
}
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {} ({}): {}",
- static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
- OpCode::GetInfo(instr.opcode).name, instr.hex);
+ NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
@@ -476,6 +599,12 @@ private:
}
}
+ // Close the predicate condition scope.
+ if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
+ --shader.scope;
+ shader.AddLine('}');
+ }
+
return offset + 1;
}
@@ -605,6 +734,12 @@ private:
declarations.AddNewLine();
++const_buffer_layout;
}
+
+ declarations.AddNewLine();
+ for (const auto& pred : declr_predicates) {
+ declarations.AddLine("bool " + pred + " = false;");
+ }
+ declarations.AddNewLine();
}
private:
@@ -618,6 +753,7 @@ private:
// Declarations
std::set<std::string> declr_register;
+ std::set<std::string> declr_predicates;
std::set<Attribute::Index> declr_input_attribute;
std::set<Attribute::Index> declr_output_attribute;
std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 5e78723a2..ab0acb20a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -9,13 +9,10 @@
#include <memory>
#include <glad/glad.h>
#include "common/assert.h"
-#include "common/bit_field.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
-#include "core/hw/hw.h"
-#include "core/hw/lcd.h"
#include "core/memory.h"
#include "core/settings.h"
#include "core/tracer/recorder.h"
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 4df687786..e0509f0ce 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -53,6 +53,7 @@ u32 BytesPerPixel(TextureFormat format) {
// In this case a 'pixel' actually refers to a 4x4 tile.
return 16;
case TextureFormat::A8R8G8B8:
+ case TextureFormat::A2B10G10R10:
return 4;
case TextureFormat::B5G6R5:
return 2;
@@ -78,6 +79,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
unswizzled_data.data(), true, block_height);
break;
case TextureFormat::A8R8G8B8:
+ case TextureFormat::A2B10G10R10:
case TextureFormat::B5G6R5:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, block_height);
@@ -100,6 +102,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
case TextureFormat::DXT23:
case TextureFormat::DXT45:
case TextureFormat::A8R8G8B8:
+ case TextureFormat::A2B10G10R10:
case TextureFormat::B5G6R5:
// TODO(Subv): For the time being just forward the same data without any decoding.
rgba_data = texture_data;
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 86e45aa88..dc004d361 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -15,6 +15,7 @@ namespace Texture {
enum class TextureFormat : u32 {
A8R8G8B8 = 0x8,
+ A2B10G10R10 = 0x9,
B5G6R5 = 0x15,
DXT1 = 0x24,
DXT23 = 0x25,