aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_processor.cpp170
-rw-r--r--src/video_core/command_processor.h17
-rw-r--r--src/video_core/engines/maxwell_3d.cpp6
-rw-r--r--src/video_core/engines/maxwell_3d.h22
-rw-r--r--src/video_core/engines/maxwell_dma.cpp12
-rw-r--r--src/video_core/engines/shader_bytecode.h115
-rw-r--r--src/video_core/gpu.cpp1
-rw-r--r--src/video_core/gpu.h8
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_base.h1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp207
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h23
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp472
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp445
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h50
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_state.h8
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp2
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp26
-rw-r--r--src/video_core/textures/texture.h12
25 files changed, 1110 insertions, 582 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d5831e752..2625ddfdc 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -28,98 +28,106 @@ enum class BufferMethods {
CountBufferMethods = 0x40,
};
-void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) {
- LOG_TRACE(HW_GPU,
- "Processing method {:08X} on subchannel {} value "
- "{:08X} remaining params {}",
- method, subchannel, value, remaining_params);
-
- ASSERT(subchannel < bound_engines.size());
-
- if (method == static_cast<u32>(BufferMethods::BindObject)) {
- // Bind the current subchannel to the desired engine id.
- LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
- bound_engines[subchannel] = static_cast<EngineID>(value);
- return;
- }
+MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
- if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
- // TODO(Subv): Research and implement these methods.
- LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
- return;
- }
+void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
+ MICROPROFILE_SCOPE(ProcessCommandLists);
- const EngineID engine = bound_engines[subchannel];
-
- switch (engine) {
- case EngineID::FERMI_TWOD_A:
- fermi_2d->WriteReg(method, value);
- break;
- case EngineID::MAXWELL_B:
- maxwell_3d->WriteReg(method, value, remaining_params);
- break;
- case EngineID::MAXWELL_COMPUTE_B:
- maxwell_compute->WriteReg(method, value);
- break;
- case EngineID::MAXWELL_DMA_COPY_A:
- maxwell_dma->WriteReg(method, value);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented engine");
- }
-}
+ auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
+ LOG_TRACE(HW_GPU,
+ "Processing method {:08X} on subchannel {} value "
+ "{:08X} remaining params {}",
+ method, subchannel, value, remaining_params);
-void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
- const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
- VAddr current_addr = *head_address;
- while (current_addr < *head_address + size * sizeof(CommandHeader)) {
- const CommandHeader header = {Memory::Read32(current_addr)};
- current_addr += sizeof(u32);
-
- switch (header.mode.Value()) {
- case SubmissionMode::IncreasingOld:
- case SubmissionMode::Increasing: {
- // Increase the method value with each argument.
- for (unsigned i = 0; i < header.arg_count; ++i) {
- WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - i - 1);
- current_addr += sizeof(u32);
- }
- break;
+ ASSERT(subchannel < bound_engines.size());
+
+ if (method == static_cast<u32>(BufferMethods::BindObject)) {
+ // Bind the current subchannel to the desired engine id.
+ LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
+ bound_engines[subchannel] = static_cast<EngineID>(value);
+ return;
}
- case SubmissionMode::NonIncreasingOld:
- case SubmissionMode::NonIncreasing: {
- // Use the same method value for all arguments.
- for (unsigned i = 0; i < header.arg_count; ++i) {
- WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - i - 1);
- current_addr += sizeof(u32);
- }
+
+ if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
+ // TODO(Subv): Research and implement these methods.
+ LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
+ return;
+ }
+
+ const EngineID engine = bound_engines[subchannel];
+
+ switch (engine) {
+ case EngineID::FERMI_TWOD_A:
+ fermi_2d->WriteReg(method, value);
+ break;
+ case EngineID::MAXWELL_B:
+ maxwell_3d->WriteReg(method, value, remaining_params);
break;
+ case EngineID::MAXWELL_COMPUTE_B:
+ maxwell_compute->WriteReg(method, value);
+ break;
+ case EngineID::MAXWELL_DMA_COPY_A:
+ maxwell_dma->WriteReg(method, value);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented engine");
}
- case SubmissionMode::IncreaseOnce: {
- ASSERT(header.arg_count.Value() >= 1);
+ };
- // Use the original method for the first argument and then the next method for all other
- // arguments.
- WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - 1);
+ for (auto entry : commands) {
+ Tegra::GPUVAddr address = entry.Address();
+ u32 size = entry.sz;
+ const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
+ VAddr current_addr = *head_address;
+ while (current_addr < *head_address + size * sizeof(CommandHeader)) {
+ const CommandHeader header = {Memory::Read32(current_addr)};
current_addr += sizeof(u32);
- for (unsigned i = 1; i < header.arg_count; ++i) {
- WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - i - 1);
+ switch (header.mode.Value()) {
+ case SubmissionMode::IncreasingOld:
+ case SubmissionMode::Increasing: {
+ // Increase the method value with each argument.
+ for (unsigned i = 0; i < header.arg_count; ++i) {
+ WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - i - 1);
+ current_addr += sizeof(u32);
+ }
+ break;
+ }
+ case SubmissionMode::NonIncreasingOld:
+ case SubmissionMode::NonIncreasing: {
+ // Use the same method value for all arguments.
+ for (unsigned i = 0; i < header.arg_count; ++i) {
+ WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - i - 1);
+ current_addr += sizeof(u32);
+ }
+ break;
+ }
+ case SubmissionMode::IncreaseOnce: {
+ ASSERT(header.arg_count.Value() >= 1);
+
+ // Use the original method for the first argument and then the next method for all
+ // other arguments.
+ WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - 1);
current_addr += sizeof(u32);
+
+ for (unsigned i = 1; i < header.arg_count; ++i) {
+ WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - i - 1);
+ current_addr += sizeof(u32);
+ }
+ break;
+ }
+ case SubmissionMode::Inline: {
+ // The register value is stored in the bits 16-28 as an immediate
+ WriteReg(header.method, header.subchannel, header.inline_data, 0);
+ break;
+ }
+ default:
+ UNIMPLEMENTED();
}
- break;
- }
- case SubmissionMode::Inline: {
- // The register value is stored in the bits 16-28 as an immediate
- WriteReg(header.method, header.subchannel, header.inline_data, 0);
- break;
- }
- default:
- UNIMPLEMENTED();
}
}
}
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
index a01153e0b..bd766e77a 100644
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@@ -7,6 +7,7 @@
#include <type_traits>
#include "common/bit_field.h"
#include "common/common_types.h"
+#include "video_core/memory_manager.h"
namespace Tegra {
@@ -19,6 +20,22 @@ enum class SubmissionMode : u32 {
IncreaseOnce = 5
};
+struct CommandListHeader {
+ u32 entry0; // gpu_va_lo
+ union {
+ u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
+ BitField<0, 8, u32> gpu_va_hi;
+ BitField<8, 2, u32> unk1;
+ BitField<10, 21, u32> sz;
+ BitField<31, 1, u32> unk2;
+ };
+
+ GPUVAddr Address() const {
+ return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0;
+ }
+};
+static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size");
+
union CommandHeader {
u32 hex;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index e63ad4d46..329079ddd 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -135,8 +135,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
break;
}
- rasterizer.NotifyMaxwellRegisterChanged(method);
-
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
}
@@ -293,10 +291,6 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
"TIC versions other than BlockLinear or Pitch are unimplemented");
- ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
- (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),
- "Texture types other than Texture2D are unimplemented");
-
auto r_type = tic_entry.r_type.Value();
auto g_type = tic_entry.g_type.Value();
auto b_type = tic_entry.b_type.Value();
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index f59d01738..d3be900a4 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -533,7 +533,11 @@ public:
u32 stencil_back_mask;
u32 stencil_back_func_mask;
- INSERT_PADDING_WORDS(0x20);
+ INSERT_PADDING_WORDS(0x13);
+
+ u32 rt_separate_frag_data;
+
+ INSERT_PADDING_WORDS(0xC);
struct {
u32 address_high;
@@ -557,7 +561,22 @@ public:
struct {
union {
BitField<0, 4, u32> count;
+ BitField<4, 3, u32> map_0;
+ BitField<7, 3, u32> map_1;
+ BitField<10, 3, u32> map_2;
+ BitField<13, 3, u32> map_3;
+ BitField<16, 3, u32> map_4;
+ BitField<19, 3, u32> map_5;
+ BitField<22, 3, u32> map_6;
+ BitField<25, 3, u32> map_7;
};
+
+ u32 GetMap(size_t index) const {
+ const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
+ map_4, map_5, map_6, map_7};
+ ASSERT(index < maps.size());
+ return maps[index];
+ }
} rt_control;
INSERT_PADDING_WORDS(0x2);
@@ -968,6 +987,7 @@ ASSERT_REG_POSITION(clear_stencil, 0x368);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 6e740713f..c24d33d5c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -41,7 +41,6 @@ void MaxwellDMA::HandleCopy() {
// TODO(Subv): Perform more research and implement all features of this engine.
ASSERT(regs.exec.enable_swizzle == 0);
- ASSERT(regs.exec.enable_2d == 1);
ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
@@ -51,10 +50,19 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.dst_params.pos_y == 0);
if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
- Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count * regs.y_count);
+ size_t copy_size = regs.x_count;
+
+ // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
+ // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count).
+ if (regs.exec.enable_2d) {
+ copy_size = copy_size * regs.y_count;
+ }
+
+ Memory::CopyBlock(dest_cpu, source_cpu, copy_size);
return;
}
+ ASSERT(regs.exec.enable_2d == 1);
u8* src_buffer = Memory::GetPointer(source_cpu);
u8* dst_buffer = Memory::GetPointer(dest_cpu);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d2388673e..58f2904ce 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -244,6 +244,25 @@ enum class TextureType : u64 {
TextureCube = 3,
};
+enum class TextureQueryType : u64 {
+ Dimension = 1,
+ TextureType = 2,
+ SamplePosition = 5,
+ Filter = 16,
+ LevelOfDetail = 18,
+ Wrap = 20,
+ BorderColor = 22,
+};
+
+enum class TextureProcessMode : u64 {
+ None = 0,
+ LZ = 1, // Unknown, appears to be the same as none.
+ LB = 2, // Load Bias.
+ LL = 3, // Load LOD (LevelOfDetail)
+ LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
+ LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL
+};
+
enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 };
enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 };
@@ -414,6 +433,45 @@ union Instruction {
} bfe;
union {
+ BitField<48, 3, u64> pred48;
+
+ union {
+ BitField<20, 20, u64> entry_a;
+ BitField<39, 5, u64> entry_b;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> uses_cc;
+ } imm;
+
+ union {
+ BitField<20, 14, u64> cb_index;
+ BitField<34, 5, u64> cb_offset;
+ BitField<56, 1, u64> neg;
+ BitField<57, 1, u64> uses_cc;
+ } hi;
+
+ union {
+ BitField<20, 14, u64> cb_index;
+ BitField<34, 5, u64> cb_offset;
+ BitField<39, 5, u64> entry_a;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> uses_cc;
+ } rz;
+
+ union {
+ BitField<39, 5, u64> entry_a;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> uses_cc;
+ } r1;
+
+ union {
+ BitField<28, 8, u64> entry_a;
+ BitField<37, 1, u64> neg;
+ BitField<38, 1, u64> uses_cc;
+ } r2;
+
+ } lea;
+
+ union {
BitField<0, 5, FlowCondition> cond;
} flow;
@@ -468,6 +526,18 @@ union Instruction {
} psetp;
union {
+ BitField<12, 3, u64> pred12;
+ BitField<15, 1, u64> neg_pred12;
+ BitField<24, 2, PredOperation> cond;
+ BitField<29, 3, u64> pred29;
+ BitField<32, 1, u64> neg_pred29;
+ BitField<39, 3, u64> pred39;
+ BitField<42, 1, u64> neg_pred39;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, PredOperation> op;
+ } pset;
+
+ union {
BitField<39, 3, u64> pred39;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> neg_a;
@@ -512,6 +582,7 @@ union Instruction {
BitField<28, 1, u64> array;
BitField<29, 2, TextureType> texture_type;
BitField<31, 4, u64> component_mask;
+ BitField<55, 3, TextureProcessMode> process_mode;
bool IsComponentEnabled(size_t component) const {
return ((1ull << component) & component_mask) != 0;
@@ -519,6 +590,21 @@ union Instruction {
} tex;
union {
+ BitField<22, 6, TextureQueryType> query_type;
+ BitField<31, 4, u64> component_mask;
+ } txq;
+
+ union {
+ BitField<28, 1, u64> array;
+ BitField<29, 2, TextureType> texture_type;
+ BitField<31, 4, u64> component_mask;
+
+ bool IsComponentEnabled(size_t component) const {
+ return ((1ull << component) & component_mask) != 0;
+ }
+ } tmml;
+
+ union {
BitField<28, 1, u64> array;
BitField<29, 2, TextureType> texture_type;
BitField<56, 2, u64> component;
@@ -670,11 +756,13 @@ public:
LDG, // Load from global memory
STG, // Store in global memory
TEX,
- TEXQ, // Texture Query
- TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
- TLDS, // Texture Load with scalar/non-vec4 source/destinations
- TLD4, // Texture Load 4
- TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
+ TXQ, // Texture Query
+ TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
+ TLDS, // Texture Load with scalar/non-vec4 source/destinations
+ TLD4, // Texture Load 4
+ TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
+ TMML_B, // Texture Mip Map Level
+ TMML, // Texture Mip Map Level
EXIT,
IPA,
FFMA_IMM, // Fused Multiply and Add
@@ -699,6 +787,11 @@ public:
ISCADD_C, // Scale and Add
ISCADD_R,
ISCADD_IMM,
+ LEA_R1,
+ LEA_R2,
+ LEA_RZ,
+ LEA_IMM,
+ LEA_HI,
POPC_C,
POPC_R,
POPC_IMM,
@@ -757,6 +850,7 @@ public:
ISET_C,
ISET_IMM,
PSETP,
+ PSET,
XMAD_IMM,
XMAD_CR,
XMAD_RC,
@@ -780,6 +874,7 @@ public:
IntegerSet,
IntegerSetPredicate,
PredicateSetPredicate,
+ PredicateSetRegister,
Conversion,
Xmad,
Unknown,
@@ -894,11 +989,13 @@ private:
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
- INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
+ INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
+ INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"),
+ INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
@@ -929,6 +1026,11 @@ private:
INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
+ INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
+ INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
+ INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
+ INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
+ INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -983,6 +1085,7 @@ private:
INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
+ INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index e6d8e65c6..86a809f86 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,6 +66,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
case RenderTargetFormat::RGBA8_UINT:
case RenderTargetFormat::RGB10_A2_UNORM:
case RenderTargetFormat::BGRA8_UNORM:
+ case RenderTargetFormat::BGRA8_SRGB:
case RenderTargetFormat::RG16_UNORM:
case RenderTargetFormat::RG16_SNORM:
case RenderTargetFormat::RG16_UINT:
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index d29f31f52..589a59b4f 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@
#include <array>
#include <memory>
+#include <vector>
#include "common/common_types.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/memory_manager.h"
@@ -26,6 +27,7 @@ enum class RenderTargetFormat : u32 {
RG32_FLOAT = 0xCB,
RG32_UINT = 0xCD,
BGRA8_UNORM = 0xCF,
+ BGRA8_SRGB = 0xD0,
RGB10_A2_UNORM = 0xD1,
RGBA8_UNORM = 0xD5,
RGBA8_SRGB = 0xD6,
@@ -67,6 +69,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
/// Returns the number of bytes per pixel of each depth format.
u32 DepthFormatBytesPerPixel(DepthFormat format);
+struct CommandListHeader;
class DebugContext;
/**
@@ -115,7 +118,7 @@ public:
~GPU();
/// Processes a command list stored at the specified address in GPU memory.
- void ProcessCommandList(GPUVAddr address, u32 size);
+ void ProcessCommandLists(const std::vector<CommandListHeader>& commands);
/// Returns a reference to the Maxwell3D GPU engine.
Engines::Maxwell3D& Maxwell3D();
@@ -130,9 +133,6 @@ public:
const Tegra::MemoryManager& MemoryManager() const;
private:
- /// Writes a single register in the engine bound to the specified subchannel
- void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params);
-
std::unique_ptr<Tegra::MemoryManager> memory_manager;
/// Mapping of command subchannels to their bound engine ids.
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 9d78e8b6b..cd819d69f 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -20,9 +20,6 @@ public:
/// Clear the current framebuffer
virtual void Clear() = 0;
- /// Notify rasterizer that the specified Maxwell register has been changed
- virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;
-
/// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0;
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index be17a2b9c..0df3725c2 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -19,6 +19,7 @@ void RendererBase::RefreshBaseSettings() {
UpdateCurrentFramebufferLayout();
renderer_settings.use_framelimiter = Settings::values.use_frame_limit;
+ renderer_settings.set_background_color = true;
}
void RendererBase::UpdateCurrentFramebufferLayout() {
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 2a357f9d0..2cd0738ff 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -19,6 +19,7 @@ namespace VideoCore {
struct RendererSettings {
std::atomic_bool use_framelimiter{false};
+ std::atomic_bool set_background_color{false};
};
class RendererBase : NonCopyable {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0c3bbc475..c59f3af1b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -294,61 +294,80 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
cached_pages.add({pages_interval, delta});
}
-std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb,
- bool using_depth_fb,
- bool preserve_contents) {
+void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
+ bool preserve_contents,
+ boost::optional<size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) {
- LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured");
- using_color_fb = false;
+ Surface depth_surface;
+ if (using_depth_fb) {
+ depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
}
- const bool has_stencil = regs.stencil_enable;
- const bool write_color_fb =
- state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
- state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
-
- const bool write_depth_fb =
- (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) ||
- (has_stencil && (state.stencil.front.write_mask || state.stencil.back.write_mask));
+ // TODO(bunnei): Figure out how the below register works. According to envytools, this should be
+ // used to enable multiple render targets. However, it is left unset on all games that I have
+ // tested.
+ ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
- Surface color_surface;
- Surface depth_surface;
- MathUtil::Rectangle<u32> surfaces_rect;
- std::tie(color_surface, depth_surface, surfaces_rect) =
- res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, preserve_contents);
+ // Bind the framebuffer surfaces
+ state.draw.draw_framebuffer = framebuffer.handle;
+ state.Apply();
- const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
- const MathUtil::Rectangle<u32> draw_rect{
- static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
- surfaces_rect.left, surfaces_rect.right)), // Left
- static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
- surfaces_rect.bottom, surfaces_rect.top)), // Top
- static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right,
- surfaces_rect.left, surfaces_rect.right)), // Right
- static_cast<u32>(
- std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom,
- surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
+ if (using_color_fb) {
+ if (single_color_target) {
+ // Used when just a single color attachment is enabled, e.g. for clearing a color buffer
+ Surface color_surface =
+ res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
+ glFramebufferTexture2D(
+ GL_DRAW_FRAMEBUFFER,
+ GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
+ color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
+ glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target));
+ } else {
+ // Multiple color attachments are enabled
+ std::array<GLenum, Maxwell::NumRenderTargets> buffers;
+ for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
+ buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
+ glFramebufferTexture2D(
+ GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
+ GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0,
+ 0);
+ }
+ glDrawBuffers(regs.rt_control.count, buffers.data());
+ }
+ } else {
+ // No color attachments are enabled - zero out all of them
+ for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
+ GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
+ 0, 0);
+ }
+ glDrawBuffer(GL_NONE);
+ }
- // Bind the framebuffer surfaces
- BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
+ if (depth_surface) {
+ if (regs.stencil_enable) {
+ // Attach both depth and stencil
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ depth_surface->Texture().handle, 0);
+ } else {
+ // Attach depth
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+ depth_surface->Texture().handle, 0);
+ // Clear stencil attachment
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+ }
+ } else {
+ // Clear both depth and stencil attachment
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+ }
- SyncViewport(surfaces_rect);
+ SyncViewport();
- // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
- // scissor test to prevent drawing outside of the framebuffer region
- state.scissor.enabled = true;
- state.scissor.x = draw_rect.left;
- state.scissor.y = draw_rect.bottom;
- state.scissor.width = draw_rect.GetWidth();
- state.scissor.height = draw_rect.GetHeight();
state.Apply();
-
- // Only return the surface to be marked as dirty if writing to it is enabled.
- return std::make_pair(write_color_fb ? color_surface : nullptr,
- write_depth_fb ? depth_surface : nullptr);
}
void RasterizerOpenGL::Clear() {
@@ -356,8 +375,9 @@ void RasterizerOpenGL::Clear() {
SCOPE_EXIT({ prev_state.Apply(); });
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- bool use_color_fb = false;
- bool use_depth_fb = false;
+ bool use_color{};
+ bool use_depth{};
+ bool use_stencil{};
OpenGLState clear_state;
clear_state.draw.draw_framebuffer = state.draw.draw_framebuffer;
@@ -366,22 +386,13 @@ void RasterizerOpenGL::Clear() {
clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
- GLbitfield clear_mask{};
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
- if (regs.clear_buffers.RT == 0) {
- // We only support clearing the first color attachment for now
- clear_mask |= GL_COLOR_BUFFER_BIT;
- use_color_fb = true;
- } else {
- // TODO(subv): Add support for the other color attachments
- LOG_CRITICAL(HW_GPU, "Clear unimplemented for RT {}", regs.clear_buffers.RT);
- }
+ use_color = true;
}
if (regs.clear_buffers.Z) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
- use_depth_fb = true;
- clear_mask |= GL_DEPTH_BUFFER_BIT;
+ use_depth = true;
// Always enable the depth write when clearing the depth buffer. The depth write mask is
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
@@ -390,34 +401,33 @@ void RasterizerOpenGL::Clear() {
}
if (regs.clear_buffers.S) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
- use_depth_fb = true;
- clear_mask |= GL_STENCIL_BUFFER_BIT;
+ use_stencil = true;
clear_state.stencil.test_enabled = true;
}
- if (!use_color_fb && !use_depth_fb) {
+ if (!use_color && !use_depth && !use_stencil) {
// No color surface nor depth/stencil surface are enabled
return;
}
- if (clear_mask == 0) {
- // No clear mask is enabled
- return;
- }
-
ScopeAcquireGLContext acquire_context{emu_window};
- auto [dirty_color_surface, dirty_depth_surface] =
- ConfigureFramebuffers(use_color_fb, use_depth_fb, false);
+ ConfigureFramebuffers(use_color, use_depth || use_stencil, false,
+ regs.clear_buffers.RT.Value());
clear_state.Apply();
- glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2],
- regs.clear_color[3]);
- glClearDepth(regs.clear_depth);
- glClearStencil(regs.clear_stencil);
+ if (use_color) {
+ glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
+ }
- glClear(clear_mask);
+ if (use_depth && use_stencil) {
+ glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
+ } else if (use_depth) {
+ glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
+ } else if (use_stencil) {
+ glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
+ }
}
void RasterizerOpenGL::DrawArrays() {
@@ -430,8 +440,7 @@ void RasterizerOpenGL::DrawArrays() {
ScopeAcquireGLContext acquire_context{emu_window};
- const auto [dirty_color_surface, dirty_depth_surface] =
- ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0, true);
+ ConfigureFramebuffers();
SyncDepthTestState();
SyncStencilTestState();
@@ -525,8 +534,6 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply();
}
-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
-
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
@@ -586,7 +593,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
void RasterizerOpenGL::SamplerInfo::Create() {
sampler.Create();
mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear;
- wrap_u = wrap_v = Tegra::Texture::WrapMode::Wrap;
+ wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap;
// default is GL_LINEAR_MIPMAP_LINEAR
glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
@@ -613,8 +620,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
wrap_v = config.wrap_v;
glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
}
+ if (wrap_p != config.wrap_p) {
+ wrap_p = config.wrap_p;
+ glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
+ }
- if (wrap_u == Tegra::Texture::WrapMode::Border || wrap_v == Tegra::Texture::WrapMode::Border) {
+ if (wrap_u == Tegra::Texture::WrapMode::Border || wrap_v == Tegra::Texture::WrapMode::Border ||
+ wrap_p == Tegra::Texture::WrapMode::Border) {
const GLvec4 new_border_color = {{config.border_color_r, config.border_color_g,
config.border_color_b, config.border_color_a}};
if (border_color != new_border_color) {
@@ -698,14 +710,15 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
if (!texture.enabled) {
- state.texture_units[current_bindpoint].texture_2d = 0;
+ state.texture_units[current_bindpoint].texture = 0;
continue;
}
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
Surface surface = res_cache.GetTextureSurface(texture);
if (surface != nullptr) {
- state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle;
+ state.texture_units[current_bindpoint].texture = surface->Texture().handle;
+ state.texture_units[current_bindpoint].target = surface->Target();
state.texture_units[current_bindpoint].swizzle.r =
MaxwellToGL::SwizzleSource(texture.tic.x_source);
state.texture_units[current_bindpoint].swizzle.g =
@@ -716,45 +729,19 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
MaxwellToGL::SwizzleSource(texture.tic.w_source);
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
- state.texture_units[current_bindpoint].texture_2d = 0;
+ state.texture_units[current_bindpoint].texture = 0;
}
}
return current_unit + static_cast<u32>(entries.size());
}
-void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
- const Surface& depth_surface, bool has_stencil) {
- state.draw.draw_framebuffer = framebuffer.handle;
- state.Apply();
-
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
- color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
- if (depth_surface != nullptr) {
- if (has_stencil) {
- // attach both depth and stencil
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->Texture().handle, 0);
- } else {
- // attach depth
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->Texture().handle, 0);
- // clear stencil attachment
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- }
- } else {
- // clear both depth and stencil attachment
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
- }
-}
-
-void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) {
+void RasterizerOpenGL::SyncViewport() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
- state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left;
- state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom;
+ state.viewport.x = viewport_rect.left;
+ state.viewport.y = viewport_rect.bottom;
state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9c30dc0e8..745c3dc0c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -13,6 +13,7 @@
#include <vector>
#include <boost/icl/interval_map.hpp>
+#include <boost/optional.hpp>
#include <boost/range/iterator_range.hpp>
#include <glad/glad.h>
@@ -45,7 +46,6 @@ public:
void DrawArrays() override;
void Clear() override;
- void NotifyMaxwellRegisterChanged(u32 method) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
@@ -93,17 +93,20 @@ private:
Tegra::Texture::TextureFilter min_filter;
Tegra::Texture::WrapMode wrap_u;
Tegra::Texture::WrapMode wrap_v;
+ Tegra::Texture::WrapMode wrap_p;
GLvec4 border_color;
};
- /// Configures the color and depth framebuffer states and returns the dirty <Color, Depth>
- /// surfaces if writing was enabled.
- std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
- bool preserve_contents);
-
- /// Binds the framebuffer color and depth surface
- void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
- bool has_stencil);
+ /**
+ * Configures the color and depth framebuffer states.
+ * @param use_color_fb If true, configure color framebuffers.
+ * @param using_depth_fb If true, configure the depth/stencil framebuffer.
+ * @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
+ * @param single_color_target Specifies if a single color buffer target should be used.
+ */
+ void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
+ bool preserve_contents = true,
+ boost::optional<size_t> single_color_target = {});
/*
* Configures the current constbuffers to use for the draw command.
@@ -126,7 +129,7 @@ private:
u32 current_unit);
/// Syncs the viewport to match the guest state
- void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect);
+ void SyncViewport();
/// Syncs the clip enabled status to match the guest state
void SyncClipEnabled();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index f6b2c5a86..32001e44b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -7,6 +7,7 @@
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "core/core.h"
@@ -52,14 +53,30 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
params.unaligned_height = config.tic.Height();
+ params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
+
+ switch (params.target) {
+ case SurfaceTarget::Texture1D:
+ case SurfaceTarget::Texture2D:
+ params.depth = 1;
+ break;
+ case SurfaceTarget::Texture3D:
+ case SurfaceTarget::Texture2DArray:
+ params.depth = config.tic.Depth();
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
+ UNREACHABLE();
+ params.depth = 1;
+ break;
+ }
+
params.size_in_bytes = params.SizeInBytes();
- params.cache_width = Common::AlignUp(params.width, 16);
- params.cache_height = Common::AlignUp(params.height, 16);
return params;
}
-/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(
- const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) {
+/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) {
+ const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
SurfaceParams params{};
params.addr = TryGetCpuAddr(config.Address());
params.is_tiled = true;
@@ -70,9 +87,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.width = config.width;
params.height = config.height;
params.unaligned_height = config.height;
+ params.target = SurfaceTarget::Texture2D;
+ params.depth = 1;
params.size_in_bytes = params.SizeInBytes();
- params.cache_width = Common::AlignUp(params.width, 16);
- params.cache_height = Common::AlignUp(params.height, 16);
return params;
}
@@ -86,13 +103,12 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.pixel_format = PixelFormatFromDepthFormat(format);
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
- params.size_in_bytes = params.SizeInBytes();
params.width = zeta_width;
params.height = zeta_height;
params.unaligned_height = zeta_height;
+ params.target = SurfaceTarget::Texture2D;
+ params.depth = 1;
params.size_in_bytes = params.SizeInBytes();
- params.cache_width = Common::AlignUp(params.width, 16);
- params.cache_height = Common::AlignUp(params.height, 16);
return params;
}
@@ -100,7 +116,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
{GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
- {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
+ {GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
false}, // A2B10G10R10U
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
@@ -166,6 +182,26 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
ComponentType::Float, false}, // Z32FS8
}};
+static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) {
+ switch (target) {
+ case SurfaceParams::SurfaceTarget::Texture1D:
+ return GL_TEXTURE_1D;
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ return GL_TEXTURE_2D;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ return GL_TEXTURE_3D;
+ case SurfaceParams::SurfaceTarget::Texture1DArray:
+ return GL_TEXTURE_1D_ARRAY;
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ return GL_TEXTURE_2D_ARRAY;
+ case SurfaceParams::SurfaceTarget::TextureCubemap:
+ return GL_TEXTURE_CUBE_MAP;
+ }
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target));
+ UNREACHABLE();
+ return {};
+}
+
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
@@ -220,7 +256,8 @@ static bool IsFormatBCn(PixelFormat format) {
}
template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, VAddr addr) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t gl_buffer_size,
+ VAddr addr) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
@@ -230,18 +267,18 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu
const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
addr, tile_size, bytes_per_pixel, stride, height, block_height);
- const size_t size_to_copy{std::min(gl_buffer.size(), data.size())};
- gl_buffer.assign(data.begin(), data.begin() + size_to_copy);
+ const size_t size_to_copy{std::min(gl_buffer_size, data.size())};
+ memcpy(gl_buffer, data.data(), size_to_copy);
} else {
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
// check the configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
- Memory::GetPointer(addr), gl_buffer.data(), morton_to_gl);
+ Memory::GetPointer(addr), gl_buffer, morton_to_gl);
}
}
-static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
// clang-format off
@@ -298,7 +335,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
// clang-format on
};
-static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
SurfaceParams::MaxPixelFormat>
gl_to_morton_fns = {
// clang-format off
@@ -357,33 +394,6 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
// clang-format on
};
-// Allocate an uninitialized texture of appropriate size and format for the surface
-static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width,
- u32 height) {
- OpenGLState cur_state = OpenGLState::GetCurState();
-
- // Keep track of previous texture bindings
- GLuint old_tex = cur_state.texture_units[0].texture_2d;
- cur_state.texture_units[0].texture_2d = texture;
- cur_state.Apply();
- glActiveTexture(GL_TEXTURE0);
-
- if (!format_tuple.compressed) {
- // Only pre-create the texture for non-compressed textures.
- glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0,
- format_tuple.format, format_tuple.type, nullptr);
- }
-
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-
- // Restore previous texture bindings
- cur_state.texture_units[0].texture_2d = old_tex;
- cur_state.Apply();
-}
-
static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
GLuint read_fb_handle, GLuint draw_fb_handle) {
@@ -438,12 +448,53 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
return true;
}
-CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
+CachedSurface::CachedSurface(const SurfaceParams& params)
+ : params(params), gl_target(SurfaceTargetToGL(params.target)) {
texture.Create();
const auto& rect{params.GetRect()};
- AllocateSurfaceTexture(texture.handle,
- GetFormatTuple(params.pixel_format, params.component_type),
+
+ // Keep track of previous texture bindings
+ OpenGLState cur_state = OpenGLState::GetCurState();
+ const auto& old_tex = cur_state.texture_units[0];
+ SCOPE_EXIT({
+ cur_state.texture_units[0] = old_tex;
+ cur_state.Apply();
+ });
+
+ cur_state.texture_units[0].texture = texture.handle;
+ cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
+ cur_state.Apply();
+ glActiveTexture(GL_TEXTURE0);
+
+ const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
+ if (!format_tuple.compressed) {
+ // Only pre-create the texture for non-compressed textures.
+ switch (params.target) {
+ case SurfaceParams::SurfaceTarget::Texture1D:
+ glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+ rect.GetWidth());
+ break;
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
rect.GetWidth(), rect.GetHeight());
+ break;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+ rect.GetWidth(), rect.GetHeight(), params.depth);
+ break;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+ static_cast<u32>(params.target));
+ UNREACHABLE();
+ glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(),
+ rect.GetHeight());
+ }
+ }
+
+ glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
@@ -461,7 +512,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
S8Z24 input_pixel{};
Z24S8 output_pixel{};
- const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
+ constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
for (size_t y = 0; y < height; ++y) {
for (size_t x = 0; x < width; ++x) {
const size_t offset{bpp * (y * width + x)};
@@ -474,7 +525,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
}
static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
- const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
+ constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
for (size_t y = 0; y < height; ++y) {
for (size_t x = 0; x < width; ++x) {
const size_t offset{bpp * (y * width + x)};
@@ -514,23 +565,6 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
}
}
-/**
- * Helper function to perform software conversion (as needed) when flushing a buffer to Switch
- * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
- * typical desktop GPUs.
- */
-static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& /*data*/, PixelFormat pixel_format,
- u32 /*width*/, u32 /*height*/) {
- switch (pixel_format) {
- case PixelFormat::ASTC_2D_4X4:
- case PixelFormat::S8Z24:
- LOG_CRITICAL(Render_OpenGL, "Unimplemented pixel_format={}",
- static_cast<u32>(pixel_format));
- UNREACHABLE();
- break;
- }
-}
-
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
void CachedSurface::LoadGLBuffer() {
ASSERT(params.type != SurfaceType::Fill);
@@ -545,13 +579,25 @@ void CachedSurface::LoadGLBuffer() {
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
if (params.is_tiled) {
- gl_buffer.resize(copy_size);
+ // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do
+ // this for 3D textures, etc.
+ switch (params.target) {
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ // Pass impl. to the fallback code below
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented tiled load for target={}",
+ static_cast<u32>(params.target));
+ UNREACHABLE();
+ }
+ gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size);
morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, gl_buffer, params.addr);
+ params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
+ params.addr);
} else {
- const u8* const texture_src_data_end = texture_src_data + copy_size;
-
+ const u8* const texture_src_data_end{texture_src_data +
+ (static_cast<size_t>(params.depth) * copy_size)};
gl_buffer.assign(texture_src_data, texture_src_data_end);
}
@@ -560,23 +606,7 @@ void CachedSurface::LoadGLBuffer() {
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
void CachedSurface::FlushGLBuffer() {
- u8* const dst_buffer = Memory::GetPointer(params.addr);
-
- ASSERT(dst_buffer);
- ASSERT(gl_buffer.size() ==
- params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
-
- MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
-
- ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
- params.height);
-
- if (!params.is_tiled) {
- std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes);
- } else {
- gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, gl_buffer, params.addr);
- }
+ ASSERT_MSG(false, "Unimplemented");
}
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
@@ -586,22 +616,29 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
MICROPROFILE_SCOPE(OpenGL_TextureUL);
- ASSERT(gl_buffer.size() ==
- params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
+ ASSERT(gl_buffer.size() == static_cast<size_t>(params.width) * params.height *
+ GetGLBytesPerPixel(params.pixel_format) * params.depth);
const auto& rect{params.GetRect()};
// Load data from memory to the surface
- GLint x0 = static_cast<GLint>(rect.left);
- GLint y0 = static_cast<GLint>(rect.bottom);
- size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format);
+ const GLint x0 = static_cast<GLint>(rect.left);
+ const GLint y0 = static_cast<GLint>(rect.bottom);
+ const size_t buffer_offset =
+ static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) *
+ GetGLBytesPerPixel(params.pixel_format);
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
- GLuint target_tex = texture.handle;
+ const GLuint target_tex = texture.handle;
OpenGLState cur_state = OpenGLState::GetCurState();
- GLuint old_tex = cur_state.texture_units[0].texture_2d;
- cur_state.texture_units[0].texture_2d = target_tex;
+ const auto& old_tex = cur_state.texture_units[0];
+ SCOPE_EXIT({
+ cur_state.texture_units[0] = old_tex;
+ cur_state.Apply();
+ });
+ cur_state.texture_units[0].texture = target_tex;
+ cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
cur_state.Apply();
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
@@ -610,136 +647,102 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
glActiveTexture(GL_TEXTURE0);
if (tuple.compressed) {
- glCompressedTexImage2D(
- GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
- static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes),
- &gl_buffer[buffer_offset]);
+ switch (params.target) {
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ glCompressedTexImage2D(
+ SurfaceTargetToGL(params.target), 0, tuple.internal_format,
+ static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0,
+ static_cast<GLsizei>(params.size_in_bytes), &gl_buffer[buffer_offset]);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ glCompressedTexImage3D(
+ SurfaceTargetToGL(params.target), 0, tuple.internal_format,
+ static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height),
+ static_cast<GLsizei>(params.depth), 0, static_cast<GLsizei>(params.size_in_bytes),
+ &gl_buffer[buffer_offset]);
+ break;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+ static_cast<u32>(params.target));
+ UNREACHABLE();
+ glCompressedTexImage2D(
+ GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
+ static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes),
+ &gl_buffer[buffer_offset]);
+ }
} else {
- glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
- static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
- &gl_buffer[buffer_offset]);
- }
-
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-
- cur_state.texture_units[0].texture_2d = old_tex;
- cur_state.Apply();
-}
-
-MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
-void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
- if (params.type == SurfaceType::Fill)
- return;
-
- MICROPROFILE_SCOPE(OpenGL_TextureDL);
-
- gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
-
- OpenGLState state = OpenGLState::GetCurState();
- OpenGLState prev_state = state;
- SCOPE_EXIT({ prev_state.Apply(); });
-
- const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-
- // Ensure no bad interactions with GL_PACK_ALIGNMENT
- ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0);
- glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
-
- const auto& rect{params.GetRect()};
- size_t buffer_offset =
- (rect.bottom * params.width + rect.left) * GetGLBytesPerPixel(params.pixel_format);
-
- state.UnbindTexture(texture.handle);
- state.draw.read_framebuffer = read_fb_handle;
- state.Apply();
- if (params.type == SurfaceType::ColorTexture) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
- texture.handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
- } else if (params.type == SurfaceType::Depth) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- texture.handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- } else {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- texture.handle, 0);
+ switch (params.target) {
+ case SurfaceParams::SurfaceTarget::Texture1D:
+ glTexSubImage1D(SurfaceTargetToGL(params.target), 0, x0,
+ static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type,
+ &gl_buffer[buffer_offset]);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ glTexSubImage2D(SurfaceTargetToGL(params.target), 0, x0, y0,
+ static_cast<GLsizei>(rect.GetWidth()),
+ static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+ &gl_buffer[buffer_offset]);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ glTexSubImage3D(SurfaceTargetToGL(params.target), 0, x0, y0, 0,
+ static_cast<GLsizei>(rect.GetWidth()),
+ static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
+ tuple.type, &gl_buffer[buffer_offset]);
+ break;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+ static_cast<u32>(params.target));
+ UNREACHABLE();
+ glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
+ static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+ &gl_buffer[buffer_offset]);
+ }
}
- glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
- static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
- tuple.format, tuple.type, &gl_buffer[buffer_offset]);
- glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
read_framebuffer.Create();
draw_framebuffer.Create();
+ copy_pbo.Create();
}
Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
return GetSurface(SurfaceParams::CreateForTexture(config));
}
-SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb,
- bool using_depth_fb,
- bool preserve_contents) {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
+ const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
+ if (!regs.zeta.Address() || !regs.zeta_enable) {
+ return {};
+ }
- // TODO(bunnei): This is hard corded to use just the first render buffer
- LOG_TRACE(Render_OpenGL, "hard-coded for render target 0!");
+ SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
+ regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format)};
- // get color and depth surfaces
- SurfaceParams color_params{};
- SurfaceParams depth_params{};
+ return GetSurface(depth_params, preserve_contents);
+}
- if (using_color_fb) {
- color_params = SurfaceParams::CreateForFramebuffer(regs.rt[0]);
- }
+Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) {
+ const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
- if (using_depth_fb) {
- depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height,
- regs.zeta.Address(), regs.zeta.format);
- }
+ ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
- MathUtil::Rectangle<u32> color_rect{};
- Surface color_surface;
- if (using_color_fb) {
- color_surface = GetSurface(color_params, preserve_contents);
- if (color_surface) {
- color_rect = color_surface->GetSurfaceParams().GetRect();
- }
+ if (index >= regs.rt_control.count) {
+ return {};
}
- MathUtil::Rectangle<u32> depth_rect{};
- Surface depth_surface;
- if (using_depth_fb) {
- depth_surface = GetSurface(depth_params, preserve_contents);
- if (depth_surface) {
- depth_rect = depth_surface->GetSurfaceParams().GetRect();
- }
+ if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
+ return {};
}
- MathUtil::Rectangle<u32> fb_rect{};
- if (color_surface && depth_surface) {
- fb_rect = color_rect;
- // Color and Depth surfaces must have the same dimensions and offsets
- if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top ||
- color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) {
- color_surface = GetSurface(color_params);
- depth_surface = GetSurface(depth_params);
- fb_rect = color_surface->GetSurfaceParams().GetRect();
- }
- } else if (color_surface) {
- fb_rect = color_rect;
- } else if (depth_surface) {
- fb_rect = depth_rect;
- }
+ const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
- return std::make_tuple(color_surface, depth_surface, fb_rect);
+ return GetSurface(color_params, preserve_contents);
}
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
@@ -748,7 +751,6 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
}
void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
- surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->FlushGLBuffer();
}
@@ -806,27 +808,26 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
// Get a new surface with the new parameters, and blit the previous surface to it
Surface new_surface{GetUncachedSurface(new_params)};
- // If format is unchanged, we can do a faster blit without reinterpreting pixel data
- if (params.pixel_format == new_params.pixel_format) {
+ if (params.pixel_format == new_params.pixel_format ||
+ !Settings::values.use_accurate_framebuffers) {
+ // If the format is the same, just do a framebuffer blit. This is significantly faster than
+ // using PBOs. The is also likely less accurate, as textures will be converted rather than
+ // reinterpreted.
+
BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
- new_surface->GetSurfaceParams().GetRect(), params.type,
- read_framebuffer.handle, draw_framebuffer.handle);
- return new_surface;
- }
+ params.GetRect(), params.type, read_framebuffer.handle,
+ draw_framebuffer.handle);
+ } else {
+ // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy,
+ // where pixels are reinterpreted as a new format (without conversion). This code path uses
+ // OpenGL PBOs and is quite slow.
- // When using accurate framebuffers, always copy old data to new surface, regardless of format
- if (Settings::values.use_accurate_framebuffers) {
auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
- // Use a Pixel Buffer Object to download the previous texture and then upload it to the new
- // one using the new format.
- OGLBuffer pbo;
- pbo.Create();
-
- glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.handle);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle);
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
if (source_format.compressed) {
glGetCompressedTextureImage(surface->Texture().handle, 0,
@@ -845,8 +846,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
// of the data in this case. Games like Super Mario Odyssey seem to hit this case
// when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
// but it doesn't clear it beforehand, the texture is already full of zeros.
- LOG_CRITICAL(HW_GPU, "Trying to upload extra texture data from the CPU during "
- "reinterpretation but the texture is tiled.");
+ LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
+ "reinterpretation but the texture is tiled.");
}
size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
std::vector<u8> data(remaining_size);
@@ -859,21 +860,38 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
const auto& dest_rect{new_params.GetRect()};
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.handle);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo.handle);
if (dest_format.compressed) {
- glCompressedTexSubImage2D(
- GL_TEXTURE_2D, 0, 0, 0, static_cast<GLsizei>(dest_rect.GetWidth()),
- static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
- static_cast<GLsizei>(new_params.SizeInBytes()), nullptr);
+ LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
+ UNREACHABLE();
} else {
- glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0,
- static_cast<GLsizei>(dest_rect.GetWidth()),
- static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
- dest_format.type, nullptr);
+ switch (new_params.target) {
+ case SurfaceParams::SurfaceTarget::Texture1D:
+ glTextureSubImage1D(new_surface->Texture().handle, 0, 0,
+ static_cast<GLsizei>(dest_rect.GetWidth()), dest_format.format,
+ dest_format.type, nullptr);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0,
+ static_cast<GLsizei>(dest_rect.GetWidth()),
+ static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
+ dest_format.type, nullptr);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ glTextureSubImage3D(new_surface->Texture().handle, 0, 0, 0, 0,
+ static_cast<GLsizei>(dest_rect.GetWidth()),
+ static_cast<GLsizei>(dest_rect.GetHeight()),
+ static_cast<GLsizei>(new_params.depth), dest_format.format,
+ dest_format.type, nullptr);
+ break;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+ static_cast<u32>(params.target));
+ UNREACHABLE();
+ }
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
- pbo.Release();
}
return new_surface;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index aad75f200..57ea8593b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -109,6 +109,33 @@ struct SurfaceParams {
Invalid = 4,
};
+ enum class SurfaceTarget {
+ Texture1D,
+ Texture2D,
+ Texture3D,
+ Texture1DArray,
+ Texture2DArray,
+ TextureCubemap,
+ };
+
+ static SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type) {
+ switch (texture_type) {
+ case Tegra::Texture::TextureType::Texture1D:
+ return SurfaceTarget::Texture1D;
+ case Tegra::Texture::TextureType::Texture2D:
+ case Tegra::Texture::TextureType::Texture2DNoMipmap:
+ return SurfaceTarget::Texture2D;
+ case Tegra::Texture::TextureType::Texture1DArray:
+ return SurfaceTarget::Texture1DArray;
+ case Tegra::Texture::TextureType::Texture2DArray:
+ return SurfaceTarget::Texture2DArray;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type));
+ UNREACHABLE();
+ return SurfaceTarget::Texture2D;
+ }
+ }
+
/**
* Gets the compression factor for the specified PixelFormat. This applies to just the
* "compressed width" and "compressed height", not the overall compression factor of a
@@ -270,6 +297,7 @@ struct SurfaceParams {
return PixelFormat::ABGR8S;
case Tegra::RenderTargetFormat::RGBA8_UINT:
return PixelFormat::ABGR8UI;
+ case Tegra::RenderTargetFormat::BGRA8_SRGB:
case Tegra::RenderTargetFormat::BGRA8_UNORM:
return PixelFormat::BGRA8;
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
@@ -542,6 +570,7 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::RGBA8_UNORM:
case Tegra::RenderTargetFormat::RGBA8_SRGB:
case Tegra::RenderTargetFormat::BGRA8_UNORM:
+ case Tegra::RenderTargetFormat::BGRA8_SRGB:
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
case Tegra::RenderTargetFormat::R8_UNORM:
case Tegra::RenderTargetFormat::RG16_UNORM:
@@ -635,15 +664,14 @@ struct SurfaceParams {
ASSERT(width % compression_factor == 0);
ASSERT(height % compression_factor == 0);
return (width / compression_factor) * (height / compression_factor) *
- GetFormatBpp(pixel_format) / CHAR_BIT;
+ GetFormatBpp(pixel_format) * depth / CHAR_BIT;
}
/// Creates SurfaceParams from a texture configuration
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
/// Creates SurfaceParams from a framebuffer configuration
- static SurfaceParams CreateForFramebuffer(
- const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
+ static SurfaceParams CreateForFramebuffer(size_t index);
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
@@ -652,8 +680,8 @@ struct SurfaceParams {
/// Checks if surfaces are compatible for caching
bool IsCompatibleSurface(const SurfaceParams& other) const {
- return std::tie(pixel_format, type, cache_width, cache_height) ==
- std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height);
+ return std::tie(pixel_format, type, width, height) ==
+ std::tie(other.pixel_format, other.type, other.width, other.height);
}
VAddr addr;
@@ -664,12 +692,10 @@ struct SurfaceParams {
SurfaceType type;
u32 width;
u32 height;
+ u32 depth;
u32 unaligned_height;
size_t size_in_bytes;
-
- // Parameters used for caching only
- u32 cache_width;
- u32 cache_height;
+ SurfaceTarget target;
};
}; // namespace OpenGL
@@ -709,6 +735,10 @@ public:
return texture;
}
+ GLenum Target() const {
+ return gl_target;
+ }
+
static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) {
if (format == SurfaceParams::PixelFormat::Invalid)
return 0;
@@ -724,14 +754,14 @@ public:
void LoadGLBuffer();
void FlushGLBuffer();
- // Upload/Download data in gl_buffer in/to this surface's texture
+ // Upload data in gl_buffer to this surface's texture
void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
- void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
private:
OGLTexture texture;
std::vector<u8> gl_buffer;
SurfaceParams params;
+ GLenum gl_target;
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -741,9 +771,11 @@ public:
/// Get a surface based on the texture configuration
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
- /// Get the color and depth surfaces based on the framebuffer configuration
- SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
- bool preserve_contents);
+ /// Get the depth surface based on the framebuffer configuration
+ Surface GetDepthBufferSurface(bool preserve_contents);
+
+ /// Get the color surface based on the framebuffer configuration and the specified render target
+ Surface GetColorBufferSurface(size_t index, bool preserve_contents);
/// Flushes the surface to Switch memory
void FlushSurface(const Surface& surface);
@@ -774,6 +806,10 @@ private:
OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer;
+
+ /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
+ /// using the new format.
+ OGLBuffer copy_pbo;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 7e4b85ac3..61080f5cc 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -13,8 +13,8 @@ namespace OpenGL {
/// Gets the address for the specified shader stage program
static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
- auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
- auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
+ const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ const auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
shader_config.offset);
}
@@ -86,7 +86,7 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
}
GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
- auto search{resource_cache.find(buffer.GetHash())};
+ const auto search{resource_cache.find(buffer.GetHash())};
if (search == resource_cache.end()) {
const GLuint index{
glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
@@ -98,7 +98,7 @@ GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& b
}
GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
- auto search{uniform_cache.find(sampler.GetHash())};
+ const auto search{uniform_cache.find(sampler.GetHash())};
if (search == uniform_cache.end()) {
const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())};
uniform_cache[sampler.GetHash()] = index;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 841647ebe..2d56370c7 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -113,7 +113,7 @@ private:
/// Scans a range of code for labels and determines the exit method.
ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
- auto [iter, inserted] =
+ const auto [iter, inserted] =
exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
ExitMethod& exit_method = iter->second;
if (!inserted)
@@ -131,22 +131,22 @@ private:
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
return exit_method = ExitMethod::AlwaysEnd;
} else {
- ExitMethod not_met = Scan(offset + 1, end, labels);
+ const ExitMethod not_met = Scan(offset + 1, end, labels);
return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
}
}
case OpCode::Id::BRA: {
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
labels.insert(target);
- ExitMethod no_jmp = Scan(offset + 1, end, labels);
- ExitMethod jmp = Scan(target, end, labels);
+ const ExitMethod no_jmp = Scan(offset + 1, end, labels);
+ const ExitMethod jmp = Scan(target, end, labels);
return exit_method = ParallelExit(no_jmp, jmp);
}
case OpCode::Id::SSY: {
// The SSY instruction uses a similar encoding as the BRA instruction.
ASSERT_MSG(instr.bra.constant_buffer == 0,
"Constant buffer SSY is not supported");
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
labels.insert(target);
// Continue scanning for an exit method.
break;
@@ -346,8 +346,8 @@ public:
*/
void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
const Tegra::Shader::IpaMode& input_mode) {
- std::string dest = GetRegisterAsFloat(reg);
- std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem);
+ const std::string dest = GetRegisterAsFloat(reg);
+ const std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem);
shader.AddLine(dest + " = " + src + ';');
}
@@ -359,8 +359,8 @@ public:
* @param reg The register to use as the source value.
*/
void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) {
- std::string dest = GetOutputAttribute(attribute);
- std::string src = GetRegisterAsFloat(reg);
+ const std::string dest = GetOutputAttribute(attribute);
+ const std::string src = GetRegisterAsFloat(reg);
if (!dest.empty()) {
// Can happen with unknown/unimplemented output attributes, in which case we ignore the
@@ -393,9 +393,9 @@ public:
GLSLRegister::Type type) {
declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage);
- std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
- std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
- final_offset + " % 4]";
+ const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
+ const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
+ final_offset + " % 4]";
if (type == GLSLRegister::Type::Float) {
return value;
@@ -443,13 +443,12 @@ public:
}
declarations.AddNewLine();
- // Append the sampler2D array for the used textures.
- const size_t num_samplers = used_samplers.size();
- if (num_samplers > 0) {
- declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' +
- std::to_string(num_samplers) + "];");
- declarations.AddNewLine();
+ const auto& samplers = GetSamplers();
+ for (const auto& sampler : samplers) {
+ declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() +
+ ';');
}
+ declarations.AddNewLine();
}
/// Returns a list of constant buffer declarations
@@ -461,27 +460,29 @@ public:
}
/// Returns a list of samplers used in the shader
- std::vector<SamplerEntry> GetSamplers() const {
+ const std::vector<SamplerEntry>& GetSamplers() const {
return used_samplers;
}
/// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
/// necessary.
- std::string AccessSampler(const Sampler& sampler) {
- size_t offset = static_cast<size_t>(sampler.index.Value());
+ std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
+ bool is_array) {
+ const size_t offset = static_cast<size_t>(sampler.index.Value());
// If this sampler has already been used, return the existing mapping.
- auto itr =
+ const auto itr =
std::find_if(used_samplers.begin(), used_samplers.end(),
[&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
if (itr != used_samplers.end()) {
+ ASSERT(itr->GetType() == type && itr->IsArray() == is_array);
return itr->GetName();
}
// Otherwise create a new mapping for this sampler
- size_t next_index = used_samplers.size();
- SamplerEntry entry{stage, offset, next_index};
+ const size_t next_index = used_samplers.size();
+ const SamplerEntry entry{stage, offset, next_index, type, is_array};
used_samplers.emplace_back(entry);
return entry.GetName();
}
@@ -698,7 +699,7 @@ private:
};
bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
- u32 bit = render_target * 4 + component;
+ const u32 bit = render_target * 4 + component;
return enabled_color_outputs & (1 << bit);
}
};
@@ -706,7 +707,7 @@ private:
/// Gets the Subroutine object corresponding to the specified address.
const Subroutine& GetSubroutine(u32 begin, u32 end) const {
- auto iter = subroutines.find(Subroutine{begin, end, suffix});
+ const auto iter = subroutines.find(Subroutine{begin, end, suffix});
ASSERT(iter != subroutines.end());
return *iter;
}
@@ -722,8 +723,8 @@ private:
}
/// Generates code representing a texture sampler.
- std::string GetSampler(const Sampler& sampler) {
- return regs.AccessSampler(sampler);
+ std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array) {
+ return regs.AccessSampler(sampler, type, is_array);
}
/**
@@ -751,7 +752,7 @@ private:
// Can't assign to the constant predicate.
ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
- std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
+ const std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
shader.AddLine(variable + " = " + value + ';');
declr_predicates.insert(std::move(variable));
}
@@ -1022,7 +1023,7 @@ private:
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
if (header.IsColorComponentOutputEnabled(render_target, component)) {
- shader.AddLine(fmt::format("color[{}][{}] = {};", render_target, component,
+ shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
regs.GetRegisterAsFloat(current_reg)));
++current_reg;
}
@@ -1032,7 +1033,11 @@ private:
if (header.writes_depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
- shader.AddLine("gl_FragDepth = " + regs.GetRegisterAsFloat(current_reg + 1) + ';');
+
+ shader.AddLine(
+ "gl_FragDepth = " +
+ regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) +
+ ';');
}
}
@@ -1434,7 +1439,7 @@ private:
if (instr.alu_integer.negate_b)
op_b = "-(" + op_b + ')';
- std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
+ const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
regs.SetRegisterToInteger(instr.gpr0, true, 0,
"((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
@@ -1452,7 +1457,7 @@ private:
case OpCode::Id::SEL_C:
case OpCode::Id::SEL_R:
case OpCode::Id::SEL_IMM: {
- std::string condition =
+ const std::string condition =
GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0);
regs.SetRegisterToInteger(instr.gpr0, true, 0,
'(' + condition + ") ? " + op_a + " : " + op_b, 1, 1);
@@ -1474,8 +1479,9 @@ private:
case OpCode::Id::LOP3_C:
case OpCode::Id::LOP3_R:
case OpCode::Id::LOP3_IMM: {
- std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
+ const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
std::string lut;
+
if (opcode->GetId() == OpCode::Id::LOP3_R) {
lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')';
} else {
@@ -1490,15 +1496,82 @@ private:
case OpCode::Id::IMNMX_IMM: {
ASSERT_MSG(instr.imnmx.exchange == Tegra::Shader::IMinMaxExchange::None,
"Unimplemented");
- std::string condition =
+ const std::string condition =
GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
- std::string parameters = op_a + ',' + op_b;
+ const std::string parameters = op_a + ',' + op_b;
regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0,
'(' + condition + ") ? min(" + parameters + ") : max(" +
parameters + ')',
1, 1);
break;
}
+ case OpCode::Id::LEA_R2:
+ case OpCode::Id::LEA_R1:
+ case OpCode::Id::LEA_IMM:
+ case OpCode::Id::LEA_RZ:
+ case OpCode::Id::LEA_HI: {
+ std::string op_a;
+ std::string op_b;
+ std::string op_c;
+
+ switch (opcode->GetId()) {
+ case OpCode::Id::LEA_R2: {
+ op_a = regs.GetRegisterAsInteger(instr.gpr20);
+ op_b = regs.GetRegisterAsInteger(instr.gpr39);
+ op_c = std::to_string(instr.lea.r2.entry_a);
+ break;
+ }
+
+ case OpCode::Id::LEA_R1: {
+ const bool neg = instr.lea.r1.neg != 0;
+ op_a = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_a = "-(" + op_a + ')';
+ op_b = regs.GetRegisterAsInteger(instr.gpr20);
+ op_c = std::to_string(instr.lea.r1.entry_a);
+ break;
+ }
+
+ case OpCode::Id::LEA_IMM: {
+ const bool neg = instr.lea.imm.neg != 0;
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_b = "-(" + op_b + ')';
+ op_a = std::to_string(instr.lea.imm.entry_a);
+ op_c = std::to_string(instr.lea.imm.entry_b);
+ break;
+ }
+
+ case OpCode::Id::LEA_RZ: {
+ const bool neg = instr.lea.rz.neg != 0;
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_b = "-(" + op_b + ')';
+ op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset,
+ GLSLRegister::Type::Integer);
+ op_c = std::to_string(instr.lea.rz.entry_a);
+
+ break;
+ }
+
+ case OpCode::Id::LEA_HI:
+ default: {
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ op_a = std::to_string(instr.lea.imm.entry_a);
+ op_c = std::to_string(instr.lea.imm.entry_b);
+ LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+ if (instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex)) {
+ LOG_ERROR(HW_GPU, "Unhandled LEA Predicate");
+ UNREACHABLE();
+ }
+ const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1);
+
+ break;
+ }
default: {
LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
opcode->GetName());
@@ -1509,7 +1582,7 @@ private:
break;
}
case OpCode::Type::Ffma: {
- std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
std::string op_b = instr.ffma.negate_b ? "-" : "";
std::string op_c = instr.ffma.negate_c ? "-" : "";
@@ -1719,7 +1792,7 @@ private:
shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
" / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
- std::string op_a =
+ const std::string op_a =
regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index",
GLSLRegister::Type::Float);
@@ -1729,7 +1802,7 @@ private:
break;
case Tegra::Shader::UniformType::Double: {
- std::string op_b =
+ const std::string op_b =
regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4,
"index", GLSLRegister::Type::Float);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
@@ -1753,17 +1826,74 @@ private:
break;
}
case OpCode::Id::TEX: {
- const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+ ASSERT_MSG(instr.tex.array == 0, "TEX arrays unimplemented");
+ Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
+ std::string coord;
+
+ switch (texture_type) {
+ case Tegra::Shader::TextureType::Texture1D: {
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ coord = "float coords = " + x + ';';
+ break;
+ }
+ case Tegra::Shader::TextureType::Texture2D: {
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(texture_type));
+ UNREACHABLE();
+
+ // Fallback to interpreting as a 2D texture for now
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ texture_type = Tegra::Shader::TextureType::Texture2D;
+ }
+ // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias
+ // or lod.
+ const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20);
+
+ const std::string sampler = GetSampler(instr.sampler, texture_type, false);
// Add an extra scope and declare the texture coords inside to prevent
// overwriting them in case they are used as outputs of the texs instruction.
+
shader.AddLine("{");
++shader.scope;
shader.AddLine(coord);
- const std::string texture = "texture(" + sampler + ", coords)";
+ std::string texture;
+ switch (instr.tex.process_mode) {
+ case Tegra::Shader::TextureProcessMode::None: {
+ texture = "texture(" + sampler + ", coords)";
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LZ: {
+ texture = "textureLod(" + sampler + ", coords, 0.0)";
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LB:
+ case Tegra::Shader::TextureProcessMode::LBA: {
+ // TODO: Figure if A suffix changes the equation at all.
+ texture = "texture(" + sampler + ", coords, " + op_c + ')';
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LL:
+ case Tegra::Shader::TextureProcessMode::LLA: {
+ // TODO: Figure if A suffix changes the equation at all.
+ texture = "textureLod(" + sampler + ", coords, " + op_c + ')';
+ break;
+ }
+ default: {
+ texture = "texture(" + sampler + ", coords)";
+ LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}",
+ static_cast<u32>(instr.tex.process_mode.Value()));
+ UNREACHABLE();
+ }
+ }
size_t dest_elem{};
for (size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
@@ -1778,20 +1908,65 @@ private:
break;
}
case OpCode::Id::TEXS: {
- const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+ std::string coord;
+ Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
+ bool is_array{instr.texs.IsArrayTexture()};
+
+ switch (texture_type) {
+ case Tegra::Shader::TextureType::Texture2D: {
+ if (is_array) {
+ const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
+ } else {
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ }
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(texture_type));
+ UNREACHABLE();
+ // Fallback to interpreting as a 2D texture for now
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ texture_type = Tegra::Shader::TextureType::Texture2D;
+ is_array = false;
+ }
+ const std::string sampler = GetSampler(instr.sampler, texture_type, is_array);
const std::string texture = "texture(" + sampler + ", coords)";
WriteTexsInstruction(instr, coord, texture);
break;
}
case OpCode::Id::TLDS: {
- const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsInteger(instr.gpr20);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "ivec2 coords = ivec2(" + op_a + ", " + op_b + ");";
+ ASSERT(instr.tlds.GetTextureType() == Tegra::Shader::TextureType::Texture2D);
+ ASSERT(instr.tlds.IsArrayTexture() == false);
+ std::string coord;
+
+ switch (instr.tlds.GetTextureType()) {
+ case Tegra::Shader::TextureType::Texture2D: {
+ if (instr.tlds.IsArrayTexture()) {
+ LOG_CRITICAL(HW_GPU, "Unhandled 2d array texture");
+ UNREACHABLE();
+ } else {
+ const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
+ const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
+ coord = "ivec2 coords = ivec2(" + x + ", " + y + ");";
+ }
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(instr.tlds.GetTextureType()));
+ UNREACHABLE();
+ }
+ const std::string sampler = GetSampler(instr.sampler, instr.tlds.GetTextureType(),
+ instr.tlds.IsArrayTexture());
const std::string texture = "texelFetch(" + sampler + ", coords, 0)";
WriteTexsInstruction(instr, coord, texture);
break;
@@ -1799,12 +1974,12 @@ private:
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D);
ASSERT(instr.tld4.array == 0);
- std::string coord{};
+ std::string coord;
switch (instr.tld4.texture_type) {
case Tegra::Shader::TextureType::Texture2D: {
- std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
coord = "vec2 coords = vec2(" + x + ", " + y + ");";
break;
}
@@ -1814,7 +1989,8 @@ private:
UNREACHABLE();
}
- const std::string sampler = GetSampler(instr.sampler);
+ const std::string sampler =
+ GetSampler(instr.sampler, instr.tld4.texture_type, false);
// Add an extra scope and declare the texture coords inside to prevent
// overwriting them in case they are used as outputs of the texs instruction.
shader.AddLine("{");
@@ -1840,13 +2016,82 @@ private:
const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
- const std::string sampler = GetSampler(instr.sampler);
+ const std::string sampler =
+ GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false);
const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
const std::string texture = "textureGather(" + sampler + ", coords, " +
std::to_string(instr.tld4s.component) + ')';
WriteTexsInstruction(instr, coord, texture);
break;
}
+ case OpCode::Id::TXQ: {
+ // TODO: the new commits on the texture refactor, change the way samplers work.
+ // Sadly, not all texture instructions specify the type of texture their sampler
+ // uses. This must be fixed at a later instance.
+ const std::string sampler =
+ GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false);
+ switch (instr.txq.query_type) {
+ case Tegra::Shader::TextureQueryType::Dimension: {
+ const std::string texture = "textureQueryLevels(" + sampler + ')';
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, texture, 1, 1);
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled texture query type: {}",
+ static_cast<u32>(instr.txq.query_type.Value()));
+ UNREACHABLE();
+ }
+ }
+ break;
+ }
+ case OpCode::Id::TMML: {
+ const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ const bool is_array = instr.tmml.array != 0;
+ auto texture_type = instr.tmml.texture_type.Value();
+ const std::string sampler = GetSampler(instr.sampler, texture_type, is_array);
+
+ // TODO: add coordinates for different samplers once other texture types are
+ // implemented.
+ std::string coord;
+ switch (texture_type) {
+ case Tegra::Shader::TextureType::Texture1D: {
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ coord = "float coords = " + x + ';';
+ break;
+ }
+ case Tegra::Shader::TextureType::Texture2D: {
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(texture_type));
+ UNREACHABLE();
+
+ // Fallback to interpreting as a 2D texture for now
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ texture_type = Tegra::Shader::TextureType::Texture2D;
+ }
+ // Add an extra scope and declare the texture coords inside to prevent
+ // overwriting them in case they are used as outputs of the texs instruction.
+ shader.AddLine('{');
+ ++shader.scope;
+ shader.AddLine(coord);
+ const std::string texture = "textureQueryLod(" + sampler + ", coords)";
+ const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);";
+ shader.AddLine(tmp);
+
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1);
+ regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1);
+ --shader.scope;
+ shader.AddLine('}');
+ break;
+ }
default: {
LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
UNREACHABLE();
@@ -1886,12 +2131,12 @@ private:
// We can't use the constant predicate as destination.
ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.fsetp.op);
+ const std::string combiner = GetPredicateCombiner(instr.fsetp.op);
- std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
+ const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.fsetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1905,7 +2150,8 @@ private:
break;
}
case OpCode::Type::IntegerSetPredicate: {
- std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
+ const std::string op_a =
+ regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
std::string op_b;
if (instr.is_b_imm) {
@@ -1922,12 +2168,12 @@ private:
// We can't use the constant predicate as destination.
ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.isetp.op);
+ const std::string combiner = GetPredicateCombiner(instr.isetp.op);
- std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
+ const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.isetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1940,21 +2186,45 @@ private:
}
break;
}
+ case OpCode::Type::PredicateSetRegister: {
+ const std::string op_a =
+ GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
+ const std::string op_b =
+ GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0);
+
+ const std::string second_pred =
+ GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0);
+
+ const std::string combiner = GetPredicateCombiner(instr.pset.op);
+
+ const std::string predicate =
+ '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')';
+ const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')';
+ if (instr.pset.bf == 0) {
+ const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0";
+ regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1);
+ } else {
+ const std::string value = '(' + result + ") ? 1.0 : 0.0";
+ regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1);
+ }
+
+ break;
+ }
case OpCode::Type::PredicateSetPredicate: {
- std::string op_a =
+ const std::string op_a =
GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
- std::string op_b =
+ const std::string op_b =
GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
// We can't use the constant predicate as destination.
ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
- std::string combiner = GetPredicateCombiner(instr.psetp.op);
+ const std::string combiner = GetPredicateCombiner(instr.psetp.op);
- std::string predicate =
+ const std::string predicate =
'(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
// Set the primary predicate to the result of Predicate OP SecondPredicate
@@ -1980,7 +2250,7 @@ private:
std::string op_b = instr.fset.neg_b ? "-" : "";
if (instr.is_b_imm) {
- std::string imm = GetImmediate19(instr);
+ const std::string imm = GetImmediate19(instr);
if (instr.fset.neg_imm)
op_b += "(-" + imm + ')';
else
@@ -2000,13 +2270,14 @@ private:
// The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
// condition is true, and to 0 otherwise.
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.fset.op);
+ const std::string combiner = GetPredicateCombiner(instr.fset.op);
- std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) +
- ") " + combiner + " (" + second_pred + "))";
+ const std::string predicate = "((" +
+ GetPredicateComparison(instr.fset.cond, op_a, op_b) +
+ ") " + combiner + " (" + second_pred + "))";
if (instr.fset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -2017,7 +2288,7 @@ private:
break;
}
case OpCode::Type::IntegerSet: {
- std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
+ const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
std::string op_b;
@@ -2034,13 +2305,14 @@ private:
// The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
// condition is true, and to 0 otherwise.
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.iset.op);
+ const std::string combiner = GetPredicateCombiner(instr.iset.op);
- std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) +
- ") " + combiner + " (" + second_pred + "))";
+ const std::string predicate = "((" +
+ GetPredicateComparison(instr.iset.cond, op_a, op_b) +
+ ") " + combiner + " (" + second_pred + "))";
if (instr.iset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -2190,7 +2462,7 @@ private:
case OpCode::Id::BRA: {
ASSERT_MSG(instr.bra.constant_buffer == 0,
"BRA with constant buffers are not implemented");
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
break;
}
@@ -2214,7 +2486,7 @@ private:
// has a similar structure to the BRA opcode.
ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported");
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
EmitPushToSSYStack(target);
break;
}
@@ -2308,10 +2580,10 @@ private:
shader.AddLine("case " + std::to_string(label) + "u: {");
++shader.scope;
- auto next_it = labels.lower_bound(label + 1);
- u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
+ const auto next_it = labels.lower_bound(label + 1);
+ const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
- u32 compile_end = CompileRange(label, next_label);
+ const u32 compile_end = CompileRange(label, next_label);
if (compile_end > next_label && compile_end != PROGRAM_END) {
// This happens only when there is a label inside a IF/LOOP block
shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }");
@@ -2374,7 +2646,8 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
Maxwell3D::Regs::ShaderStage stage,
const std::string& suffix) {
try {
- auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
+ const auto subroutines =
+ ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix);
return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
} catch (const DecompileFail& exception) {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index e1b1a9d73..b0466c18f 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -88,7 +88,14 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
.get_value_or({});
out += R"(
in vec4 position;
-layout(location = 0) out vec4 color[8];
+layout(location = 0) out vec4 FragColor0;
+layout(location = 1) out vec4 FragColor1;
+layout(location = 2) out vec4 FragColor2;
+layout(location = 3) out vec4 FragColor3;
+layout(location = 4) out vec4 FragColor4;
+layout(location = 5) out vec4 FragColor5;
+layout(location = 6) out vec4 FragColor6;
+layout(location = 7) out vec4 FragColor7;
layout (std140) uniform fs_config {
vec4 viewport_flip;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index cbb2090ea..a43e2997b 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -9,6 +9,7 @@
#include <vector>
#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
namespace OpenGL::GLShader {
@@ -73,8 +74,9 @@ class SamplerEntry {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
- SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index)
- : offset(offset), stage(stage), sampler_index(index) {}
+ SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index,
+ Tegra::Shader::TextureType type, bool is_array)
+ : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {}
size_t GetOffset() const {
return offset;
@@ -89,8 +91,41 @@ public:
}
std::string GetName() const {
- return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '[' +
- std::to_string(sampler_index) + ']';
+ return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' +
+ std::to_string(sampler_index);
+ }
+
+ std::string GetTypeString() const {
+ using Tegra::Shader::TextureType;
+ std::string glsl_type;
+
+ switch (type) {
+ case TextureType::Texture1D:
+ glsl_type = "sampler1D";
+ break;
+ case TextureType::Texture2D:
+ glsl_type = "sampler2D";
+ break;
+ case TextureType::Texture3D:
+ glsl_type = "sampler3D";
+ break;
+ case TextureType::TextureCube:
+ glsl_type = "samplerCube";
+ break;
+ default:
+ UNIMPLEMENTED();
+ }
+ if (is_array)
+ glsl_type += "Array";
+ return glsl_type;
+ }
+
+ Tegra::Shader::TextureType GetType() const {
+ return type;
+ }
+
+ bool IsArray() const {
+ return is_array;
}
u32 GetHash() const {
@@ -105,11 +140,14 @@ private:
static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
"tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
};
+
/// Offset in TSC memory from which to read the sampler object, as specified by the sampling
/// instruction.
size_t offset;
- Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
- size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
+ Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
+ size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
+ Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
+ bool is_array; ///< Whether the texture is being sampled as an array texture or not.
};
struct ShaderEntries {
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 5781d9d16..5f3fe067e 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -25,7 +25,7 @@ GLuint LoadShader(const char* source, GLenum type) {
default:
UNREACHABLE();
}
- GLuint shader_id = glCreateShader(type);
+ const GLuint shader_id = glCreateShader(type);
glShaderSource(shader_id, 1, &source, nullptr);
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
glCompileShader(shader_id);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 60a4defd1..6f70deb96 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -200,9 +200,9 @@ void OpenGLState::Apply() const {
const auto& texture_unit = texture_units[i];
const auto& cur_state_texture_unit = cur_state.texture_units[i];
- if (texture_unit.texture_2d != cur_state_texture_unit.texture_2d) {
+ if (texture_unit.texture != cur_state_texture_unit.texture) {
glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum());
- glBindTexture(GL_TEXTURE_2D, texture_unit.texture_2d);
+ glBindTexture(texture_unit.target, texture_unit.texture);
}
if (texture_unit.sampler != cur_state_texture_unit.sampler) {
glBindSampler(static_cast<GLuint>(i), texture_unit.sampler);
@@ -214,7 +214,7 @@ void OpenGLState::Apply() const {
texture_unit.swizzle.a != cur_state_texture_unit.swizzle.a) {
std::array<GLint, 4> mask = {texture_unit.swizzle.r, texture_unit.swizzle.g,
texture_unit.swizzle.b, texture_unit.swizzle.a};
- glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
+ glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
}
}
@@ -287,7 +287,7 @@ void OpenGLState::Apply() const {
OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
for (auto& unit : texture_units) {
- if (unit.texture_2d == handle) {
+ if (unit.texture == handle) {
unit.Unbind();
}
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 46e96a97d..e3e24b9e7 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -94,8 +94,9 @@ public:
// 3 texture units - one for each that is used in PICA fragment shader emulation
struct TextureUnit {
- GLuint texture_2d; // GL_TEXTURE_BINDING_2D
- GLuint sampler; // GL_SAMPLER_BINDING
+ GLuint texture; // GL_TEXTURE_BINDING_2D
+ GLuint sampler; // GL_SAMPLER_BINDING
+ GLenum target;
struct {
GLint r; // GL_TEXTURE_SWIZZLE_R
GLint g; // GL_TEXTURE_SWIZZLE_G
@@ -104,7 +105,7 @@ public:
} swizzle;
void Unbind() {
- texture_2d = 0;
+ texture = 0;
swizzle.r = GL_RED;
swizzle.g = GL_GREEN;
swizzle.b = GL_BLUE;
@@ -114,6 +115,7 @@ public:
void Reset() {
Unbind();
sampler = 0;
+ target = GL_TEXTURE_2D;
}
};
std::array<TextureUnit, 32> texture_units;
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index e565afcee..aadf68f16 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -29,7 +29,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coh
if (GLAD_GL_ARB_buffer_storage) {
persistent = true;
coherent = prefer_coherent;
- GLbitfield flags =
+ const GLbitfield flags =
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
glBufferStorage(gl_target, allocate_size, nullptr, flags);
mapped_ptr = static_cast<u8*>(glMapBufferRange(
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 411a73d50..96d916b07 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -177,7 +177,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
Memory::GetPointer(framebuffer_addr),
gl_framebuffer_data.data(), true);
- state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
+ state.texture_units[0].texture = screen_info.texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
@@ -194,7 +194,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
- state.texture_units[0].texture_2d = 0;
+ state.texture_units[0].texture = 0;
state.Apply();
}
}
@@ -205,7 +205,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
*/
void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture) {
- state.texture_units[0].texture_2d = texture.resource.handle;
+ state.texture_units[0].texture = texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
@@ -214,7 +214,7 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
// Update existing texture
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
- state.texture_units[0].texture_2d = 0;
+ state.texture_units[0].texture = 0;
state.Apply();
}
@@ -260,7 +260,7 @@ void RendererOpenGL::InitOpenGLObjects() {
// Allocation of storage is deferred until the first frame, when we
// know the framebuffer size.
- state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
+ state.texture_units[0].texture = screen_info.texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
@@ -272,7 +272,7 @@ void RendererOpenGL::InitOpenGLObjects() {
screen_info.display_texture = screen_info.texture.resource.handle;
- state.texture_units[0].texture_2d = 0;
+ state.texture_units[0].texture = 0;
state.Apply();
// Clear screen to black
@@ -305,14 +305,14 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
UNREACHABLE();
}
- state.texture_units[0].texture_2d = texture.resource.handle;
+ state.texture_units[0].texture = texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
texture.gl_format, texture.gl_type, nullptr);
- state.texture_units[0].texture_2d = 0;
+ state.texture_units[0].texture = 0;
state.Apply();
}
@@ -354,14 +354,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
}};
- state.texture_units[0].texture_2d = screen_info.display_texture;
+ state.texture_units[0].texture = screen_info.display_texture;
state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
state.Apply();
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
- state.texture_units[0].texture_2d = 0;
+ state.texture_units[0].texture = 0;
state.Apply();
}
@@ -369,6 +369,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
* Draws the emulated screens to the emulator window.
*/
void RendererOpenGL::DrawScreen() {
+ if (renderer_settings.set_background_color) {
+ // Update background color before drawing
+ glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
+ 0.0f);
+ }
+
const auto& layout = render_window.GetFramebufferLayout();
const auto& screen = layout.screen;
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c6bd2f4b9..c2fb824b2 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -170,8 +170,12 @@ struct TICEntry {
BitField<0, 16, u32> width_minus_1;
BitField<23, 4, TextureType> texture_type;
};
- u16 height_minus_1;
- INSERT_PADDING_BYTES(10);
+ union {
+ BitField<0, 16, u32> height_minus_1;
+ BitField<16, 15, u32> depth_minus_1;
+ };
+
+ INSERT_PADDING_BYTES(8);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
@@ -192,6 +196,10 @@ struct TICEntry {
return height_minus_1 + 1;
}
+ u32 Depth() const {
+ return depth_minus_1 + 1;
+ }
+
u32 BlockHeight() const {
ASSERT(header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey);