diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/command_processor.cpp | 170 | ||||
| -rw-r--r-- | src/video_core/command_processor.h | 17 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 22 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 105 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 8 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 191 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 160 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 21 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 316 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_util.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_stream_buffer.cpp | 2 |
18 files changed, 678 insertions, 393 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index d5831e752..2625ddfdc 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -28,98 +28,106 @@ enum class BufferMethods { CountBufferMethods = 0x40, }; -void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) { - LOG_TRACE(HW_GPU, - "Processing method {:08X} on subchannel {} value " - "{:08X} remaining params {}", - method, subchannel, value, remaining_params); - - ASSERT(subchannel < bound_engines.size()); - - if (method == static_cast<u32>(BufferMethods::BindObject)) { - // Bind the current subchannel to the desired engine id. - LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value); - bound_engines[subchannel] = static_cast<EngineID>(value); - return; - } +MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192)); - if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) { - // TODO(Subv): Research and implement these methods. - LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); - return; - } +void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { + MICROPROFILE_SCOPE(ProcessCommandLists); - const EngineID engine = bound_engines[subchannel]; - - switch (engine) { - case EngineID::FERMI_TWOD_A: - fermi_2d->WriteReg(method, value); - break; - case EngineID::MAXWELL_B: - maxwell_3d->WriteReg(method, value, remaining_params); - break; - case EngineID::MAXWELL_COMPUTE_B: - maxwell_compute->WriteReg(method, value); - break; - case EngineID::MAXWELL_DMA_COPY_A: - maxwell_dma->WriteReg(method, value); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented engine"); - } -} + auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { + LOG_TRACE(HW_GPU, + "Processing method {:08X} on subchannel {} value " + "{:08X} remaining params {}", + method, subchannel, value, remaining_params); -void GPU::ProcessCommandList(GPUVAddr address, u32 size) { - const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); - VAddr current_addr = *head_address; - while (current_addr < *head_address + size * sizeof(CommandHeader)) { - const CommandHeader header = {Memory::Read32(current_addr)}; - current_addr += sizeof(u32); - - switch (header.mode.Value()) { - case SubmissionMode::IncreasingOld: - case SubmissionMode::Increasing: { - // Increase the method value with each argument. - for (unsigned i = 0; i < header.arg_count; ++i) { - WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr), - header.arg_count - i - 1); - current_addr += sizeof(u32); - } - break; + ASSERT(subchannel < bound_engines.size()); + + if (method == static_cast<u32>(BufferMethods::BindObject)) { + // Bind the current subchannel to the desired engine id. + LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value); + bound_engines[subchannel] = static_cast<EngineID>(value); + return; } - case SubmissionMode::NonIncreasingOld: - case SubmissionMode::NonIncreasing: { - // Use the same method value for all arguments. - for (unsigned i = 0; i < header.arg_count; ++i) { - WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), - header.arg_count - i - 1); - current_addr += sizeof(u32); - } + + if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) { + // TODO(Subv): Research and implement these methods. + LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); + return; + } + + const EngineID engine = bound_engines[subchannel]; + + switch (engine) { + case EngineID::FERMI_TWOD_A: + fermi_2d->WriteReg(method, value); + break; + case EngineID::MAXWELL_B: + maxwell_3d->WriteReg(method, value, remaining_params); break; + case EngineID::MAXWELL_COMPUTE_B: + maxwell_compute->WriteReg(method, value); + break; + case EngineID::MAXWELL_DMA_COPY_A: + maxwell_dma->WriteReg(method, value); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented engine"); } - case SubmissionMode::IncreaseOnce: { - ASSERT(header.arg_count.Value() >= 1); + }; - // Use the original method for the first argument and then the next method for all other - // arguments. - WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), - header.arg_count - 1); + for (auto entry : commands) { + Tegra::GPUVAddr address = entry.Address(); + u32 size = entry.sz; + const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); + VAddr current_addr = *head_address; + while (current_addr < *head_address + size * sizeof(CommandHeader)) { + const CommandHeader header = {Memory::Read32(current_addr)}; current_addr += sizeof(u32); - for (unsigned i = 1; i < header.arg_count; ++i) { - WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr), - header.arg_count - i - 1); + switch (header.mode.Value()) { + case SubmissionMode::IncreasingOld: + case SubmissionMode::Increasing: { + // Increase the method value with each argument. + for (unsigned i = 0; i < header.arg_count; ++i) { + WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr), + header.arg_count - i - 1); + current_addr += sizeof(u32); + } + break; + } + case SubmissionMode::NonIncreasingOld: + case SubmissionMode::NonIncreasing: { + // Use the same method value for all arguments. + for (unsigned i = 0; i < header.arg_count; ++i) { + WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), + header.arg_count - i - 1); + current_addr += sizeof(u32); + } + break; + } + case SubmissionMode::IncreaseOnce: { + ASSERT(header.arg_count.Value() >= 1); + + // Use the original method for the first argument and then the next method for all + // other arguments. + WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), + header.arg_count - 1); current_addr += sizeof(u32); + + for (unsigned i = 1; i < header.arg_count; ++i) { + WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr), + header.arg_count - i - 1); + current_addr += sizeof(u32); + } + break; + } + case SubmissionMode::Inline: { + // The register value is stored in the bits 16-28 as an immediate + WriteReg(header.method, header.subchannel, header.inline_data, 0); + break; + } + default: + UNIMPLEMENTED(); } - break; - } - case SubmissionMode::Inline: { - // The register value is stored in the bits 16-28 as an immediate - WriteReg(header.method, header.subchannel, header.inline_data, 0); - break; - } - default: - UNIMPLEMENTED(); } } } diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h index a01153e0b..bd766e77a 100644 --- a/src/video_core/command_processor.h +++ b/src/video_core/command_processor.h @@ -7,6 +7,7 @@ #include <type_traits> #include "common/bit_field.h" #include "common/common_types.h" +#include "video_core/memory_manager.h" namespace Tegra { @@ -19,6 +20,22 @@ enum class SubmissionMode : u32 { IncreaseOnce = 5 }; +struct CommandListHeader { + u32 entry0; // gpu_va_lo + union { + u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F) + BitField<0, 8, u32> gpu_va_hi; + BitField<8, 2, u32> unk1; + BitField<10, 21, u32> sz; + BitField<31, 1, u32> unk2; + }; + + GPUVAddr Address() const { + return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0; + } +}; +static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size"); + union CommandHeader { u32 hex; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 1308080b5..329079ddd 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -135,8 +135,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { break; } - rasterizer.NotifyMaxwellRegisterChanged(method); - if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr); } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index f59d01738..d3be900a4 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -533,7 +533,11 @@ public: u32 stencil_back_mask; u32 stencil_back_func_mask; - INSERT_PADDING_WORDS(0x20); + INSERT_PADDING_WORDS(0x13); + + u32 rt_separate_frag_data; + + INSERT_PADDING_WORDS(0xC); struct { u32 address_high; @@ -557,7 +561,22 @@ public: struct { union { BitField<0, 4, u32> count; + BitField<4, 3, u32> map_0; + BitField<7, 3, u32> map_1; + BitField<10, 3, u32> map_2; + BitField<13, 3, u32> map_3; + BitField<16, 3, u32> map_4; + BitField<19, 3, u32> map_5; + BitField<22, 3, u32> map_6; + BitField<25, 3, u32> map_7; }; + + u32 GetMap(size_t index) const { + const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, + map_4, map_5, map_6, map_7}; + ASSERT(index < maps.size()); + return maps[index]; + } } rt_control; INSERT_PADDING_WORDS(0x2); @@ -968,6 +987,7 @@ ASSERT_REG_POSITION(clear_stencil, 0x368); ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); +ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); ASSERT_REG_POSITION(rt_control, 0x487); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 6e740713f..c24d33d5c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -41,7 +41,6 @@ void MaxwellDMA::HandleCopy() { // TODO(Subv): Perform more research and implement all features of this engine. ASSERT(regs.exec.enable_swizzle == 0); - ASSERT(regs.exec.enable_2d == 1); ASSERT(regs.exec.query_mode == Regs::QueryMode::None); ASSERT(regs.exec.query_intr == Regs::QueryIntr::None); ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2); @@ -51,10 +50,19 @@ void MaxwellDMA::HandleCopy() { ASSERT(regs.dst_params.pos_y == 0); if (regs.exec.is_dst_linear == regs.exec.is_src_linear) { - Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count * regs.y_count); + size_t copy_size = regs.x_count; + + // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D + // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count). + if (regs.exec.enable_2d) { + copy_size = copy_size * regs.y_count; + } + + Memory::CopyBlock(dest_cpu, source_cpu, copy_size); return; } + ASSERT(regs.exec.enable_2d == 1); u8* src_buffer = Memory::GetPointer(source_cpu); u8* dst_buffer = Memory::GetPointer(dest_cpu); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d2388673e..2db906ea5 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -244,6 +244,16 @@ enum class TextureType : u64 { TextureCube = 3, }; +enum class TextureQueryType : u64 { + Dimension = 1, + TextureType = 2, + SamplePosition = 5, + Filter = 16, + LevelOfDetail = 18, + Wrap = 20, + BorderColor = 22, +}; + enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 }; enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 }; @@ -414,6 +424,45 @@ union Instruction { } bfe; union { + BitField<48, 3, u64> pred48; + + union { + BitField<20, 20, u64> entry_a; + BitField<39, 5, u64> entry_b; + BitField<45, 1, u64> neg; + BitField<46, 1, u64> uses_cc; + } imm; + + union { + BitField<20, 14, u64> cb_index; + BitField<34, 5, u64> cb_offset; + BitField<56, 1, u64> neg; + BitField<57, 1, u64> uses_cc; + } hi; + + union { + BitField<20, 14, u64> cb_index; + BitField<34, 5, u64> cb_offset; + BitField<39, 5, u64> entry_a; + BitField<45, 1, u64> neg; + BitField<46, 1, u64> uses_cc; + } rz; + + union { + BitField<39, 5, u64> entry_a; + BitField<45, 1, u64> neg; + BitField<46, 1, u64> uses_cc; + } r1; + + union { + BitField<28, 8, u64> entry_a; + BitField<37, 1, u64> neg; + BitField<38, 1, u64> uses_cc; + } r2; + + } lea; + + union { BitField<0, 5, FlowCondition> cond; } flow; @@ -468,6 +517,18 @@ union Instruction { } psetp; union { + BitField<12, 3, u64> pred12; + BitField<15, 1, u64> neg_pred12; + BitField<24, 2, PredOperation> cond; + BitField<29, 3, u64> pred29; + BitField<32, 1, u64> neg_pred29; + BitField<39, 3, u64> pred39; + BitField<42, 1, u64> neg_pred39; + BitField<44, 1, u64> bf; + BitField<45, 2, PredOperation> op; + } pset; + + union { BitField<39, 3, u64> pred39; BitField<42, 1, u64> neg_pred; BitField<43, 1, u64> neg_a; @@ -519,6 +580,21 @@ union Instruction { } tex; union { + BitField<22, 6, TextureQueryType> query_type; + BitField<31, 4, u64> component_mask; + } txq; + + union { + BitField<28, 1, u64> array; + BitField<29, 2, TextureType> texture_type; + BitField<31, 4, u64> component_mask; + + bool IsComponentEnabled(size_t component) const { + return ((1ull << component) & component_mask) != 0; + } + } tmml; + + union { BitField<28, 1, u64> array; BitField<29, 2, TextureType> texture_type; BitField<56, 2, u64> component; @@ -670,11 +746,13 @@ public: LDG, // Load from global memory STG, // Store in global memory TEX, - TEXQ, // Texture Query - TEXS, // Texture Fetch with scalar/non-vec4 source/destinations - TLDS, // Texture Load with scalar/non-vec4 source/destinations - TLD4, // Texture Load 4 - TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations + TXQ, // Texture Query + TEXS, // Texture Fetch with scalar/non-vec4 source/destinations + TLDS, // Texture Load with scalar/non-vec4 source/destinations + TLD4, // Texture Load 4 + TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations + TMML_B, // Texture Mip Map Level + TMML, // Texture Mip Map Level EXIT, IPA, FFMA_IMM, // Fused Multiply and Add @@ -699,6 +777,11 @@ public: ISCADD_C, // Scale and Add ISCADD_R, ISCADD_IMM, + LEA_R1, + LEA_R2, + LEA_RZ, + LEA_IMM, + LEA_HI, POPC_C, POPC_R, POPC_IMM, @@ -757,6 +840,7 @@ public: ISET_C, ISET_IMM, PSETP, + PSET, XMAD_IMM, XMAD_CR, XMAD_RC, @@ -780,6 +864,7 @@ public: IntegerSet, IntegerSetPredicate, PredicateSetPredicate, + PredicateSetRegister, Conversion, Xmad, Unknown, @@ -894,11 +979,13 @@ private: INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Memory, "TEX"), - INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), + INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"), INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), + INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), + INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), @@ -929,6 +1016,11 @@ private: INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"), INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"), INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"), + INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"), + INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"), + INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), + INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), + INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), @@ -983,6 +1075,7 @@ private: INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"), INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), + INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index e6d8e65c6..86a809f86 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -66,6 +66,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { case RenderTargetFormat::RGBA8_UINT: case RenderTargetFormat::RGB10_A2_UNORM: case RenderTargetFormat::BGRA8_UNORM: + case RenderTargetFormat::BGRA8_SRGB: case RenderTargetFormat::RG16_UNORM: case RenderTargetFormat::RG16_SNORM: case RenderTargetFormat::RG16_UINT: diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index d29f31f52..589a59b4f 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -6,6 +6,7 @@ #include <array> #include <memory> +#include <vector> #include "common/common_types.h" #include "core/hle/service/nvflinger/buffer_queue.h" #include "video_core/memory_manager.h" @@ -26,6 +27,7 @@ enum class RenderTargetFormat : u32 { RG32_FLOAT = 0xCB, RG32_UINT = 0xCD, BGRA8_UNORM = 0xCF, + BGRA8_SRGB = 0xD0, RGB10_A2_UNORM = 0xD1, RGBA8_UNORM = 0xD5, RGBA8_SRGB = 0xD6, @@ -67,6 +69,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format); /// Returns the number of bytes per pixel of each depth format. u32 DepthFormatBytesPerPixel(DepthFormat format); +struct CommandListHeader; class DebugContext; /** @@ -115,7 +118,7 @@ public: ~GPU(); /// Processes a command list stored at the specified address in GPU memory. - void ProcessCommandList(GPUVAddr address, u32 size); + void ProcessCommandLists(const std::vector<CommandListHeader>& commands); /// Returns a reference to the Maxwell3D GPU engine. Engines::Maxwell3D& Maxwell3D(); @@ -130,9 +133,6 @@ public: const Tegra::MemoryManager& MemoryManager() const; private: - /// Writes a single register in the engine bound to the specified subchannel - void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params); - std::unique_ptr<Tegra::MemoryManager> memory_manager; /// Mapping of command subchannels to their bound engine ids. diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 9d78e8b6b..cd819d69f 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -20,9 +20,6 @@ public: /// Clear the current framebuffer virtual void Clear() = 0; - /// Notify rasterizer that the specified Maxwell register has been changed - virtual void NotifyMaxwellRegisterChanged(u32 method) = 0; - /// Notify rasterizer that all caches should be flushed to Switch memory virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e6d6917fa..c59f3af1b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -294,61 +294,80 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { cached_pages.add({pages_interval, delta}); } -std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, - bool using_depth_fb, - bool preserve_contents) { +void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb, + bool preserve_contents, + boost::optional<size_t> single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) { - LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured"); - using_color_fb = false; + Surface depth_surface; + if (using_depth_fb) { + depth_surface = res_cache.GetDepthBufferSurface(preserve_contents); } - const bool has_stencil = regs.stencil_enable; - const bool write_color_fb = - state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || - state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; - - const bool write_depth_fb = - (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || - (has_stencil && (state.stencil.front.write_mask || state.stencil.back.write_mask)); + // TODO(bunnei): Figure out how the below register works. According to envytools, this should be + // used to enable multiple render targets. However, it is left unset on all games that I have + // tested. + ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented"); - Surface color_surface; - Surface depth_surface; - MathUtil::Rectangle<u32> surfaces_rect; - std::tie(color_surface, depth_surface, surfaces_rect) = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, preserve_contents); + // Bind the framebuffer surfaces + state.draw.draw_framebuffer = framebuffer.handle; + state.Apply(); - const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; - const MathUtil::Rectangle<u32> draw_rect{ - static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, - surfaces_rect.left, surfaces_rect.right)), // Left - static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, - surfaces_rect.bottom, surfaces_rect.top)), // Top - static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right, - surfaces_rect.left, surfaces_rect.right)), // Right - static_cast<u32>( - std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom, - surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + if (using_color_fb) { + if (single_color_target) { + // Used when just a single color attachment is enabled, e.g. for clearing a color buffer + Surface color_surface = + res_cache.GetColorBufferSurface(*single_color_target, preserve_contents); + glFramebufferTexture2D( + GL_DRAW_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D, + color_surface != nullptr ? color_surface->Texture().handle : 0, 0); + glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target)); + } else { + // Multiple color attachments are enabled + std::array<GLenum, Maxwell::NumRenderTargets> buffers; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); + buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); + glFramebufferTexture2D( + GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), + GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0, + 0); + } + glDrawBuffers(regs.rt_control.count, buffers.data()); + } + } else { + // No color attachments are enabled - zero out all of them + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D, + 0, 0); + } + glDrawBuffer(GL_NONE); + } - // Bind the framebuffer surfaces - BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); + if (depth_surface) { + if (regs.stencil_enable) { + // Attach both depth and stencil + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->Texture().handle, 0); + } else { + // Attach depth + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->Texture().handle, 0); + // Clear stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } + } else { + // Clear both depth and stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } - SyncViewport(surfaces_rect); + SyncViewport(); - // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable - // scissor test to prevent drawing outside of the framebuffer region - state.scissor.enabled = true; - state.scissor.x = draw_rect.left; - state.scissor.y = draw_rect.bottom; - state.scissor.width = draw_rect.GetWidth(); - state.scissor.height = draw_rect.GetHeight(); state.Apply(); - - // Only return the surface to be marked as dirty if writing to it is enabled. - return std::make_pair(write_color_fb ? color_surface : nullptr, - write_depth_fb ? depth_surface : nullptr); } void RasterizerOpenGL::Clear() { @@ -356,8 +375,9 @@ void RasterizerOpenGL::Clear() { SCOPE_EXIT({ prev_state.Apply(); }); const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - bool use_color_fb = false; - bool use_depth_fb = false; + bool use_color{}; + bool use_depth{}; + bool use_stencil{}; OpenGLState clear_state; clear_state.draw.draw_framebuffer = state.draw.draw_framebuffer; @@ -366,22 +386,13 @@ void RasterizerOpenGL::Clear() { clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE; clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE; - GLbitfield clear_mask{}; if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A) { - if (regs.clear_buffers.RT == 0) { - // We only support clearing the first color attachment for now - clear_mask |= GL_COLOR_BUFFER_BIT; - use_color_fb = true; - } else { - // TODO(subv): Add support for the other color attachments - LOG_CRITICAL(HW_GPU, "Clear unimplemented for RT {}", regs.clear_buffers.RT); - } + use_color = true; } if (regs.clear_buffers.Z) { ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); - use_depth_fb = true; - clear_mask |= GL_DEPTH_BUFFER_BIT; + use_depth = true; // Always enable the depth write when clearing the depth buffer. The depth write mask is // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true. @@ -390,34 +401,33 @@ void RasterizerOpenGL::Clear() { } if (regs.clear_buffers.S) { ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); - use_depth_fb = true; - clear_mask |= GL_STENCIL_BUFFER_BIT; + use_stencil = true; clear_state.stencil.test_enabled = true; } - if (!use_color_fb && !use_depth_fb) { + if (!use_color && !use_depth && !use_stencil) { // No color surface nor depth/stencil surface are enabled return; } - if (clear_mask == 0) { - // No clear mask is enabled - return; - } - ScopeAcquireGLContext acquire_context{emu_window}; - auto [dirty_color_surface, dirty_depth_surface] = - ConfigureFramebuffers(use_color_fb, use_depth_fb, false); + ConfigureFramebuffers(use_color, use_depth || use_stencil, false, + regs.clear_buffers.RT.Value()); clear_state.Apply(); - glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2], - regs.clear_color[3]); - glClearDepth(regs.clear_depth); - glClearStencil(regs.clear_stencil); + if (use_color) { + glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); + } - glClear(clear_mask); + if (use_depth && use_stencil) { + glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); + } else if (use_depth) { + glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); + } else if (use_stencil) { + glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); + } } void RasterizerOpenGL::DrawArrays() { @@ -430,8 +440,7 @@ void RasterizerOpenGL::DrawArrays() { ScopeAcquireGLContext acquire_context{emu_window}; - const auto [dirty_color_surface, dirty_depth_surface] = - ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0, true); + ConfigureFramebuffers(); SyncDepthTestState(); SyncStencilTestState(); @@ -525,8 +534,6 @@ void RasterizerOpenGL::DrawArrays() { state.Apply(); } -void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} - void RasterizerOpenGL::FlushAll() {} void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {} @@ -729,38 +736,12 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, return current_unit + static_cast<u32>(entries.size()); } -void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, - const Surface& depth_surface, bool has_stencil) { - state.draw.draw_framebuffer = framebuffer.handle; - state.Apply(); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - color_surface != nullptr ? color_surface->Texture().handle : 0, 0); - if (depth_surface != nullptr) { - if (has_stencil) { - // attach both depth and stencil - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->Texture().handle, 0); - } else { - // attach depth - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->Texture().handle, 0); - // clear stencil attachment - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - } - } else { - // clear both depth and stencil attachment - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - } -} - -void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) { +void RasterizerOpenGL::SyncViewport() { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; - state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left; - state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom; + state.viewport.x = viewport_rect.left; + state.viewport.y = viewport_rect.bottom; state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth()); state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight()); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index c6bb1516b..745c3dc0c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -13,6 +13,7 @@ #include <vector> #include <boost/icl/interval_map.hpp> +#include <boost/optional.hpp> #include <boost/range/iterator_range.hpp> #include <glad/glad.h> @@ -45,7 +46,6 @@ public: void DrawArrays() override; void Clear() override; - void NotifyMaxwellRegisterChanged(u32 method) override; void FlushAll() override; void FlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; @@ -97,14 +97,16 @@ private: GLvec4 border_color; }; - /// Configures the color and depth framebuffer states and returns the dirty <Color, Depth> - /// surfaces if writing was enabled. - std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb, - bool preserve_contents); - - /// Binds the framebuffer color and depth surface - void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, - bool has_stencil); + /** + * Configures the color and depth framebuffer states. + * @param use_color_fb If true, configure color framebuffers. + * @param using_depth_fb If true, configure the depth/stencil framebuffer. + * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. + * @param single_color_target Specifies if a single color buffer target should be used. + */ + void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true, + bool preserve_contents = true, + boost::optional<size_t> single_color_target = {}); /* * Configures the current constbuffers to use for the draw command. @@ -127,7 +129,7 @@ private: u32 current_unit); /// Syncs the viewport to match the guest state - void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect); + void SyncViewport(); /// Syncs the clip enabled status to match the guest state void SyncClipEnabled(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 360fb0cd5..fb56decc0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -52,17 +52,31 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.type = GetFormatType(params.pixel_format); params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); - params.depth = config.tic.Depth(); params.unaligned_height = config.tic.Height(); - params.size_in_bytes = params.SizeInBytes(); - params.cache_width = Common::AlignUp(params.width, 8); - params.cache_height = Common::AlignUp(params.height, 8); params.target = SurfaceTargetFromTextureType(config.tic.texture_type); + + switch (params.target) { + case SurfaceTarget::Texture1D: + case SurfaceTarget::Texture2D: + params.depth = 1; + break; + case SurfaceTarget::Texture3D: + case SurfaceTarget::Texture2DArray: + params.depth = config.tic.Depth(); + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target)); + UNREACHABLE(); + params.depth = 1; + break; + } + + params.size_in_bytes = params.SizeInBytes(); return params; } -/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer( - const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) { +/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) { + const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; SurfaceParams params{}; params.addr = TryGetCpuAddr(config.Address()); params.is_tiled = true; @@ -72,12 +86,10 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.type = GetFormatType(params.pixel_format); params.width = config.width; params.height = config.height; - params.depth = 1; params.unaligned_height = config.height; - params.size_in_bytes = params.SizeInBytes(); - params.cache_width = Common::AlignUp(params.width, 8); - params.cache_height = Common::AlignUp(params.height, 8); params.target = SurfaceTarget::Texture2D; + params.depth = 1; + params.size_in_bytes = params.SizeInBytes(); return params; } @@ -93,12 +105,10 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.type = GetFormatType(params.pixel_format); params.width = zeta_width; params.height = zeta_height; - params.depth = 1; params.unaligned_height = zeta_height; - params.size_in_bytes = params.SizeInBytes(); - params.cache_width = Common::AlignUp(params.width, 8); - params.cache_height = Common::AlignUp(params.height, 8); params.target = SurfaceTarget::Texture2D; + params.depth = 1; + params.size_in_bytes = params.SizeInBytes(); return params; } @@ -461,30 +471,27 @@ CachedSurface::CachedSurface(const SurfaceParams& params) // Only pre-create the texture for non-compressed textures. switch (params.target) { case SurfaceParams::SurfaceTarget::Texture1D: - glTexImage1D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format, - rect.GetWidth(), 0, format_tuple.format, format_tuple.type, nullptr); + glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format, + rect.GetWidth()); break; case SurfaceParams::SurfaceTarget::Texture2D: - glTexImage2D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format, - rect.GetWidth(), rect.GetHeight(), 0, format_tuple.format, - format_tuple.type, nullptr); + glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format, + rect.GetWidth(), rect.GetHeight()); break; case SurfaceParams::SurfaceTarget::Texture3D: case SurfaceParams::SurfaceTarget::Texture2DArray: - glTexImage3D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format, - rect.GetWidth(), rect.GetHeight(), params.depth, 0, format_tuple.format, - format_tuple.type, nullptr); + glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format, + rect.GetWidth(), rect.GetHeight(), params.depth); break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", static_cast<u32>(params.target)); UNREACHABLE(); - glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, rect.GetWidth(), - rect.GetHeight(), 0, format_tuple.format, format_tuple.type, nullptr); + glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(), + rect.GetHeight()); } } - glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); @@ -505,7 +512,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { S8Z24 input_pixel{}; Z24S8 output_pixel{}; - const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; + constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; for (size_t y = 0; y < height; ++y) { for (size_t x = 0; x < width; ++x) { const size_t offset{bpp * (y * width + x)}; @@ -518,7 +525,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { } static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { - const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)}; + constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)}; for (size_t y = 0; y < height; ++y) { for (size_t x = 0; x < width; ++x) { const size_t offset{bpp * (y * width + x)}; @@ -584,12 +591,13 @@ void CachedSurface::LoadGLBuffer() { UNREACHABLE(); } - gl_buffer.resize(params.depth * copy_size); + gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size); morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( params.width, params.block_height, params.height, gl_buffer.data(), copy_size, params.addr); } else { - const u8* const texture_src_data_end{texture_src_data + (params.depth * copy_size)}; + const u8* const texture_src_data_end{texture_src_data + + (static_cast<size_t>(params.depth) * copy_size)}; gl_buffer.assign(texture_src_data, texture_src_data_end); } @@ -608,18 +616,20 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle MICROPROFILE_SCOPE(OpenGL_TextureUL); - ASSERT(gl_buffer.size() == - params.width * params.height * GetGLBytesPerPixel(params.pixel_format) * params.depth); + ASSERT(gl_buffer.size() == static_cast<size_t>(params.width) * params.height * + GetGLBytesPerPixel(params.pixel_format) * params.depth); const auto& rect{params.GetRect()}; // Load data from memory to the surface - GLint x0 = static_cast<GLint>(rect.left); - GLint y0 = static_cast<GLint>(rect.bottom); - size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format); + const GLint x0 = static_cast<GLint>(rect.left); + const GLint y0 = static_cast<GLint>(rect.bottom); + const size_t buffer_offset = + static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) * + GetGLBytesPerPixel(params.pixel_format); const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); - GLuint target_tex = texture.handle; + const GLuint target_tex = texture.handle; OpenGLState cur_state = OpenGLState::GetCurState(); const auto& old_tex = cur_state.texture_units[0]; @@ -705,62 +715,34 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu return GetSurface(SurfaceParams::CreateForTexture(config)); } -SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb, - bool using_depth_fb, - bool preserve_contents) { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; +Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { + const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs}; + if (!regs.zeta.Address() || !regs.zeta_enable) { + return {}; + } - // TODO(bunnei): This is hard corded to use just the first render buffer - LOG_TRACE(Render_OpenGL, "hard-coded for render target 0!"); + SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer( + regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format)}; - // get color and depth surfaces - SurfaceParams color_params{}; - SurfaceParams depth_params{}; + return GetSurface(depth_params, preserve_contents); +} - if (using_color_fb) { - color_params = SurfaceParams::CreateForFramebuffer(regs.rt[0]); - } +Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) { + const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs}; - if (using_depth_fb) { - depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height, - regs.zeta.Address(), regs.zeta.format); - } + ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); - MathUtil::Rectangle<u32> color_rect{}; - Surface color_surface; - if (using_color_fb) { - color_surface = GetSurface(color_params, preserve_contents); - if (color_surface) { - color_rect = color_surface->GetSurfaceParams().GetRect(); - } + if (index >= regs.rt_control.count) { + return {}; } - MathUtil::Rectangle<u32> depth_rect{}; - Surface depth_surface; - if (using_depth_fb) { - depth_surface = GetSurface(depth_params, preserve_contents); - if (depth_surface) { - depth_rect = depth_surface->GetSurfaceParams().GetRect(); - } + if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { + return {}; } - MathUtil::Rectangle<u32> fb_rect{}; - if (color_surface && depth_surface) { - fb_rect = color_rect; - // Color and Depth surfaces must have the same dimensions and offsets - if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || - color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { - color_surface = GetSurface(color_params); - depth_surface = GetSurface(depth_params); - fb_rect = color_surface->GetSurfaceParams().GetRect(); - } - } else if (color_surface) { - fb_rect = color_rect; - } else if (depth_surface) { - fb_rect = depth_rect; - } + const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; - return std::make_tuple(color_surface, depth_surface, fb_rect); + return GetSurface(color_params, preserve_contents); } void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { @@ -826,16 +808,20 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, // Get a new surface with the new parameters, and blit the previous surface to it Surface new_surface{GetUncachedSurface(new_params)}; - // If format is unchanged, we can do a faster blit without reinterpreting pixel data - if (params.pixel_format == new_params.pixel_format) { + if (params.pixel_format == new_params.pixel_format || + !Settings::values.use_accurate_framebuffers) { + // If the format is the same, just do a framebuffer blit. This is significantly faster than + // using PBOs. The is also likely less accurate, as textures will be converted rather than + // reinterpreted. + BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle, params.GetRect(), params.type, read_framebuffer.handle, draw_framebuffer.handle); - return new_surface; - } + } else { + // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy, + // where pixels are reinterpreted as a new format (without conversion). This code path uses + // OpenGL PBOs and is quite slow. - // When using accurate framebuffers, always copy old data to new surface, regardless of format - if (Settings::values.use_accurate_framebuffers) { auto source_format = GetFormatTuple(params.pixel_format, params.component_type); auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 8312b2c7a..57ea8593b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -297,6 +297,7 @@ struct SurfaceParams { return PixelFormat::ABGR8S; case Tegra::RenderTargetFormat::RGBA8_UINT: return PixelFormat::ABGR8UI; + case Tegra::RenderTargetFormat::BGRA8_SRGB: case Tegra::RenderTargetFormat::BGRA8_UNORM: return PixelFormat::BGRA8; case Tegra::RenderTargetFormat::RGB10_A2_UNORM: @@ -569,6 +570,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RGBA8_UNORM: case Tegra::RenderTargetFormat::RGBA8_SRGB: case Tegra::RenderTargetFormat::BGRA8_UNORM: + case Tegra::RenderTargetFormat::BGRA8_SRGB: case Tegra::RenderTargetFormat::RGB10_A2_UNORM: case Tegra::RenderTargetFormat::R8_UNORM: case Tegra::RenderTargetFormat::RG16_UNORM: @@ -669,8 +671,7 @@ struct SurfaceParams { static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); /// Creates SurfaceParams from a framebuffer configuration - static SurfaceParams CreateForFramebuffer( - const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); + static SurfaceParams CreateForFramebuffer(size_t index); /// Creates SurfaceParams for a depth buffer configuration static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, @@ -679,8 +680,8 @@ struct SurfaceParams { /// Checks if surfaces are compatible for caching bool IsCompatibleSurface(const SurfaceParams& other) const { - return std::tie(pixel_format, type, cache_width, cache_height) == - std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height); + return std::tie(pixel_format, type, width, height) == + std::tie(other.pixel_format, other.type, other.width, other.height); } VAddr addr; @@ -695,10 +696,6 @@ struct SurfaceParams { u32 unaligned_height; size_t size_in_bytes; SurfaceTarget target; - - // Parameters used for caching only - u32 cache_width; - u32 cache_height; }; }; // namespace OpenGL @@ -774,9 +771,11 @@ public: /// Get a surface based on the texture configuration Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); - /// Get the color and depth surfaces based on the framebuffer configuration - SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, - bool preserve_contents); + /// Get the depth surface based on the framebuffer configuration + Surface GetDepthBufferSurface(bool preserve_contents); + + /// Get the color surface based on the framebuffer configuration and the specified render target + Surface GetColorBufferSurface(size_t index, bool preserve_contents); /// Flushes the surface to Switch memory void FlushSurface(const Surface& surface); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7e4b85ac3..61080f5cc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -13,8 +13,8 @@ namespace OpenGL { /// Gets the address for the specified shader stage program static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { - auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); - auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; + const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + const auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + shader_config.offset); } @@ -86,7 +86,7 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) } GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) { - auto search{resource_cache.find(buffer.GetHash())}; + const auto search{resource_cache.find(buffer.GetHash())}; if (search == resource_cache.end()) { const GLuint index{ glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())}; @@ -98,7 +98,7 @@ GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& b } GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) { - auto search{uniform_cache.find(sampler.GetHash())}; + const auto search{uniform_cache.find(sampler.GetHash())}; if (search == uniform_cache.end()) { const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())}; uniform_cache[sampler.GetHash()] = index; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 172ba8335..582c811e0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -113,7 +113,7 @@ private: /// Scans a range of code for labels and determines the exit method. ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) { - auto [iter, inserted] = + const auto [iter, inserted] = exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); ExitMethod& exit_method = iter->second; if (!inserted) @@ -131,22 +131,22 @@ private: if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { return exit_method = ExitMethod::AlwaysEnd; } else { - ExitMethod not_met = Scan(offset + 1, end, labels); + const ExitMethod not_met = Scan(offset + 1, end, labels); return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); } } case OpCode::Id::BRA: { - u32 target = offset + instr.bra.GetBranchTarget(); + const u32 target = offset + instr.bra.GetBranchTarget(); labels.insert(target); - ExitMethod no_jmp = Scan(offset + 1, end, labels); - ExitMethod jmp = Scan(target, end, labels); + const ExitMethod no_jmp = Scan(offset + 1, end, labels); + const ExitMethod jmp = Scan(target, end, labels); return exit_method = ParallelExit(no_jmp, jmp); } case OpCode::Id::SSY: { // The SSY instruction uses a similar encoding as the BRA instruction. ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported"); - u32 target = offset + instr.bra.GetBranchTarget(); + const u32 target = offset + instr.bra.GetBranchTarget(); labels.insert(target); // Continue scanning for an exit method. break; @@ -346,8 +346,8 @@ public: */ void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute, const Tegra::Shader::IpaMode& input_mode) { - std::string dest = GetRegisterAsFloat(reg); - std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem); + const std::string dest = GetRegisterAsFloat(reg); + const std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem); shader.AddLine(dest + " = " + src + ';'); } @@ -359,8 +359,8 @@ public: * @param reg The register to use as the source value. */ void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) { - std::string dest = GetOutputAttribute(attribute); - std::string src = GetRegisterAsFloat(reg); + const std::string dest = GetOutputAttribute(attribute); + const std::string src = GetRegisterAsFloat(reg); if (!dest.empty()) { // Can happen with unknown/unimplemented output attributes, in which case we ignore the @@ -393,9 +393,9 @@ public: GLSLRegister::Type type) { declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage); - std::string final_offset = fmt::format("({} + {})", index_str, offset / 4); - std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" + - final_offset + " % 4]"; + const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4); + const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" + + final_offset + " % 4]"; if (type == GLSLRegister::Type::Float) { return value; @@ -468,10 +468,10 @@ public: /// necessary. std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array) { - size_t offset = static_cast<size_t>(sampler.index.Value()); + const size_t offset = static_cast<size_t>(sampler.index.Value()); // If this sampler has already been used, return the existing mapping. - auto itr = + const auto itr = std::find_if(used_samplers.begin(), used_samplers.end(), [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; }); @@ -481,8 +481,8 @@ public: } // Otherwise create a new mapping for this sampler - size_t next_index = used_samplers.size(); - SamplerEntry entry{stage, offset, next_index, type, is_array}; + const size_t next_index = used_samplers.size(); + const SamplerEntry entry{stage, offset, next_index, type, is_array}; used_samplers.emplace_back(entry); return entry.GetName(); } @@ -699,7 +699,7 @@ private: }; bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { - u32 bit = render_target * 4 + component; + const u32 bit = render_target * 4 + component; return enabled_color_outputs & (1 << bit); } }; @@ -707,7 +707,7 @@ private: /// Gets the Subroutine object corresponding to the specified address. const Subroutine& GetSubroutine(u32 begin, u32 end) const { - auto iter = subroutines.find(Subroutine{begin, end, suffix}); + const auto iter = subroutines.find(Subroutine{begin, end, suffix}); ASSERT(iter != subroutines.end()); return *iter; } @@ -752,7 +752,7 @@ private: // Can't assign to the constant predicate. ASSERT(pred != static_cast<u64>(Pred::UnusedIndex)); - std::string variable = 'p' + std::to_string(pred) + '_' + suffix; + const std::string variable = 'p' + std::to_string(pred) + '_' + suffix; shader.AddLine(variable + " = " + value + ';'); declr_predicates.insert(std::move(variable)); } @@ -1023,7 +1023,7 @@ private: // TODO(Subv): Figure out how dual-source blending is configured in the Switch. for (u32 component = 0; component < 4; ++component) { if (header.IsColorComponentOutputEnabled(render_target, component)) { - shader.AddLine(fmt::format("color[{}][{}] = {};", render_target, component, + shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, regs.GetRegisterAsFloat(current_reg))); ++current_reg; } @@ -1033,7 +1033,11 @@ private: if (header.writes_depth) { // The depth output is always 2 registers after the last color output, and current_reg // already contains one past the last color register. - shader.AddLine("gl_FragDepth = " + regs.GetRegisterAsFloat(current_reg + 1) + ';'); + + shader.AddLine( + "gl_FragDepth = " + + regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) + + ';'); } } @@ -1435,7 +1439,7 @@ private: if (instr.alu_integer.negate_b) op_b = "-(" + op_b + ')'; - std::string shift = std::to_string(instr.alu_integer.shift_amount.Value()); + const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value()); regs.SetRegisterToInteger(instr.gpr0, true, 0, "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1); @@ -1453,7 +1457,7 @@ private: case OpCode::Id::SEL_C: case OpCode::Id::SEL_R: case OpCode::Id::SEL_IMM: { - std::string condition = + const std::string condition = GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0); regs.SetRegisterToInteger(instr.gpr0, true, 0, '(' + condition + ") ? " + op_a + " : " + op_b, 1, 1); @@ -1475,8 +1479,9 @@ private: case OpCode::Id::LOP3_C: case OpCode::Id::LOP3_R: case OpCode::Id::LOP3_IMM: { - std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); + const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); std::string lut; + if (opcode->GetId() == OpCode::Id::LOP3_R) { lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')'; } else { @@ -1491,15 +1496,82 @@ private: case OpCode::Id::IMNMX_IMM: { ASSERT_MSG(instr.imnmx.exchange == Tegra::Shader::IMinMaxExchange::None, "Unimplemented"); - std::string condition = + const std::string condition = GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0); - std::string parameters = op_a + ',' + op_b; + const std::string parameters = op_a + ',' + op_b; regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0, '(' + condition + ") ? min(" + parameters + ") : max(" + parameters + ')', 1, 1); break; } + case OpCode::Id::LEA_R2: + case OpCode::Id::LEA_R1: + case OpCode::Id::LEA_IMM: + case OpCode::Id::LEA_RZ: + case OpCode::Id::LEA_HI: { + std::string op_a; + std::string op_b; + std::string op_c; + + switch (opcode->GetId()) { + case OpCode::Id::LEA_R2: { + op_a = regs.GetRegisterAsInteger(instr.gpr20); + op_b = regs.GetRegisterAsInteger(instr.gpr39); + op_c = std::to_string(instr.lea.r2.entry_a); + break; + } + + case OpCode::Id::LEA_R1: { + const bool neg = instr.lea.r1.neg != 0; + op_a = regs.GetRegisterAsInteger(instr.gpr8); + if (neg) + op_a = "-(" + op_a + ')'; + op_b = regs.GetRegisterAsInteger(instr.gpr20); + op_c = std::to_string(instr.lea.r1.entry_a); + break; + } + + case OpCode::Id::LEA_IMM: { + const bool neg = instr.lea.imm.neg != 0; + op_b = regs.GetRegisterAsInteger(instr.gpr8); + if (neg) + op_b = "-(" + op_b + ')'; + op_a = std::to_string(instr.lea.imm.entry_a); + op_c = std::to_string(instr.lea.imm.entry_b); + break; + } + + case OpCode::Id::LEA_RZ: { + const bool neg = instr.lea.rz.neg != 0; + op_b = regs.GetRegisterAsInteger(instr.gpr8); + if (neg) + op_b = "-(" + op_b + ')'; + op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset, + GLSLRegister::Type::Integer); + op_c = std::to_string(instr.lea.rz.entry_a); + + break; + } + + case OpCode::Id::LEA_HI: + default: { + op_b = regs.GetRegisterAsInteger(instr.gpr8); + op_a = std::to_string(instr.lea.imm.entry_a); + op_c = std::to_string(instr.lea.imm.entry_b); + LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName()); + UNREACHABLE(); + } + } + if (instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex)) { + LOG_ERROR(HW_GPU, "Unhandled LEA Predicate"); + UNREACHABLE(); + } + const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))"; + regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1); + + break; + } default: { LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}", opcode->GetName()); @@ -1510,7 +1582,7 @@ private: break; } case OpCode::Type::Ffma: { - std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); + const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); std::string op_b = instr.ffma.negate_b ? "-" : ""; std::string op_c = instr.ffma.negate_c ? "-" : ""; @@ -1720,7 +1792,7 @@ private: shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);"); - std::string op_a = + const std::string op_a = regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index", GLSLRegister::Type::Float); @@ -1730,7 +1802,7 @@ private: break; case Tegra::Shader::UniformType::Double: { - std::string op_b = + const std::string op_b = regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, "index", GLSLRegister::Type::Float); regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); @@ -1760,13 +1832,13 @@ private: switch (texture_type) { case Tegra::Shader::TextureType::Texture1D: { - std::string x = regs.GetRegisterAsFloat(instr.gpr8); + const std::string x = regs.GetRegisterAsFloat(instr.gpr8); coord = "float coords = " + x + ';'; break; } case Tegra::Shader::TextureType::Texture2D: { - std::string x = regs.GetRegisterAsFloat(instr.gpr8); - std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + const std::string x = regs.GetRegisterAsFloat(instr.gpr8); + const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); coord = "vec2 coords = vec2(" + x + ", " + y + ");"; break; } @@ -1776,8 +1848,8 @@ private: UNREACHABLE(); // Fallback to interpreting as a 2D texture for now - std::string x = regs.GetRegisterAsFloat(instr.gpr8); - std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + const std::string x = regs.GetRegisterAsFloat(instr.gpr8); + const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); coord = "vec2 coords = vec2(" + x + ", " + y + ");"; texture_type = Tegra::Shader::TextureType::Texture2D; } @@ -1811,13 +1883,13 @@ private: switch (texture_type) { case Tegra::Shader::TextureType::Texture2D: { if (is_array) { - std::string index = regs.GetRegisterAsInteger(instr.gpr8); - std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - std::string y = regs.GetRegisterAsFloat(instr.gpr20); + const std::string index = regs.GetRegisterAsInteger(instr.gpr8); + const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + const std::string y = regs.GetRegisterAsFloat(instr.gpr20); coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; } else { - std::string x = regs.GetRegisterAsFloat(instr.gpr8); - std::string y = regs.GetRegisterAsFloat(instr.gpr20); + const std::string x = regs.GetRegisterAsFloat(instr.gpr8); + const std::string y = regs.GetRegisterAsFloat(instr.gpr20); coord = "vec2 coords = vec2(" + x + ", " + y + ");"; } break; @@ -1828,8 +1900,8 @@ private: UNREACHABLE(); // Fallback to interpreting as a 2D texture for now - std::string x = regs.GetRegisterAsFloat(instr.gpr8); - std::string y = regs.GetRegisterAsFloat(instr.gpr20); + const std::string x = regs.GetRegisterAsFloat(instr.gpr8); + const std::string y = regs.GetRegisterAsFloat(instr.gpr20); coord = "vec2 coords = vec2(" + x + ", " + y + ");"; texture_type = Tegra::Shader::TextureType::Texture2D; is_array = false; @@ -1850,8 +1922,8 @@ private: LOG_CRITICAL(HW_GPU, "Unhandled 2d array texture"); UNREACHABLE(); } else { - std::string x = regs.GetRegisterAsInteger(instr.gpr8); - std::string y = regs.GetRegisterAsInteger(instr.gpr20); + const std::string x = regs.GetRegisterAsInteger(instr.gpr8); + const std::string y = regs.GetRegisterAsInteger(instr.gpr20); coord = "ivec2 coords = ivec2(" + x + ", " + y + ");"; } break; @@ -1874,8 +1946,8 @@ private: switch (instr.tld4.texture_type) { case Tegra::Shader::TextureType::Texture2D: { - std::string x = regs.GetRegisterAsFloat(instr.gpr8); - std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + const std::string x = regs.GetRegisterAsFloat(instr.gpr8); + const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); coord = "vec2 coords = vec2(" + x + ", " + y + ");"; break; } @@ -1920,6 +1992,74 @@ private: WriteTexsInstruction(instr, coord, texture); break; } + case OpCode::Id::TXQ: { + // TODO: the new commits on the texture refactor, change the way samplers work. + // Sadly, not all texture instructions specify the type of texture their sampler + // uses. This must be fixed at a later instance. + const std::string sampler = + GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false); + switch (instr.txq.query_type) { + case Tegra::Shader::TextureQueryType::Dimension: { + const std::string texture = "textureQueryLevels(" + sampler + ')'; + regs.SetRegisterToInteger(instr.gpr0, true, 0, texture, 1, 1); + break; + } + default: { + LOG_CRITICAL(HW_GPU, "Unhandled texture query type: {}", + static_cast<u32>(instr.txq.query_type.Value())); + UNREACHABLE(); + } + } + break; + } + case OpCode::Id::TMML: { + const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + const bool is_array = instr.tmml.array != 0; + auto texture_type = instr.tmml.texture_type.Value(); + const std::string sampler = GetSampler(instr.sampler, texture_type, is_array); + + // TODO: add coordinates for different samplers once other texture types are + // implemented. + std::string coord; + switch (texture_type) { + case Tegra::Shader::TextureType::Texture1D: { + std::string x = regs.GetRegisterAsFloat(instr.gpr8); + coord = "float coords = " + x + ';'; + break; + } + case Tegra::Shader::TextureType::Texture2D: { + std::string x = regs.GetRegisterAsFloat(instr.gpr8); + std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", + static_cast<u32>(texture_type)); + UNREACHABLE(); + + // Fallback to interpreting as a 2D texture for now + std::string x = regs.GetRegisterAsFloat(instr.gpr8); + std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + texture_type = Tegra::Shader::TextureType::Texture2D; + } + // Add an extra scope and declare the texture coords inside to prevent + // overwriting them in case they are used as outputs of the texs instruction. + shader.AddLine('{'); + ++shader.scope; + shader.AddLine(coord); + const std::string texture = "textureQueryLod(" + sampler + ", coords)"; + const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);"; + shader.AddLine(tmp); + + regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1); + regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1); + --shader.scope; + shader.AddLine('}'); + break; + } default: { LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName()); UNREACHABLE(); @@ -1959,12 +2099,12 @@ private: // We can't use the constant predicate as destination. ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - std::string second_pred = + const std::string second_pred = GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); - std::string combiner = GetPredicateCombiner(instr.fsetp.op); + const std::string combiner = GetPredicateCombiner(instr.fsetp.op); - std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b); + const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b); // Set the primary predicate to the result of Predicate OP SecondPredicate SetPredicate(instr.fsetp.pred3, '(' + predicate + ") " + combiner + " (" + second_pred + ')'); @@ -1978,7 +2118,8 @@ private: break; } case OpCode::Type::IntegerSetPredicate: { - std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed); + const std::string op_a = + regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed); std::string op_b; if (instr.is_b_imm) { @@ -1995,12 +2136,12 @@ private: // We can't use the constant predicate as destination. ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - std::string second_pred = + const std::string second_pred = GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0); - std::string combiner = GetPredicateCombiner(instr.isetp.op); + const std::string combiner = GetPredicateCombiner(instr.isetp.op); - std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b); + const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b); // Set the primary predicate to the result of Predicate OP SecondPredicate SetPredicate(instr.isetp.pred3, '(' + predicate + ") " + combiner + " (" + second_pred + ')'); @@ -2013,21 +2154,45 @@ private: } break; } + case OpCode::Type::PredicateSetRegister: { + const std::string op_a = + GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0); + const std::string op_b = + GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0); + + const std::string second_pred = + GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0); + + const std::string combiner = GetPredicateCombiner(instr.pset.op); + + const std::string predicate = + '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')'; + const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')'; + if (instr.pset.bf == 0) { + const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0"; + regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1); + } else { + const std::string value = '(' + result + ") ? 1.0 : 0.0"; + regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1); + } + + break; + } case OpCode::Type::PredicateSetPredicate: { - std::string op_a = + const std::string op_a = GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); - std::string op_b = + const std::string op_b = GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); // We can't use the constant predicate as destination. ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - std::string second_pred = + const std::string second_pred = GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); - std::string combiner = GetPredicateCombiner(instr.psetp.op); + const std::string combiner = GetPredicateCombiner(instr.psetp.op); - std::string predicate = + const std::string predicate = '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; // Set the primary predicate to the result of Predicate OP SecondPredicate @@ -2053,7 +2218,7 @@ private: std::string op_b = instr.fset.neg_b ? "-" : ""; if (instr.is_b_imm) { - std::string imm = GetImmediate19(instr); + const std::string imm = GetImmediate19(instr); if (instr.fset.neg_imm) op_b += "(-" + imm + ')'; else @@ -2073,13 +2238,14 @@ private: // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the // condition is true, and to 0 otherwise. - std::string second_pred = + const std::string second_pred = GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0); - std::string combiner = GetPredicateCombiner(instr.fset.op); + const std::string combiner = GetPredicateCombiner(instr.fset.op); - std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) + - ") " + combiner + " (" + second_pred + "))"; + const std::string predicate = "((" + + GetPredicateComparison(instr.fset.cond, op_a, op_b) + + ") " + combiner + " (" + second_pred + "))"; if (instr.fset.bf) { regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); @@ -2090,7 +2256,7 @@ private: break; } case OpCode::Type::IntegerSet: { - std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed); + const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed); std::string op_b; @@ -2107,13 +2273,14 @@ private: // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the // condition is true, and to 0 otherwise. - std::string second_pred = + const std::string second_pred = GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0); - std::string combiner = GetPredicateCombiner(instr.iset.op); + const std::string combiner = GetPredicateCombiner(instr.iset.op); - std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) + - ") " + combiner + " (" + second_pred + "))"; + const std::string predicate = "((" + + GetPredicateComparison(instr.iset.cond, op_a, op_b) + + ") " + combiner + " (" + second_pred + "))"; if (instr.iset.bf) { regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); @@ -2263,7 +2430,7 @@ private: case OpCode::Id::BRA: { ASSERT_MSG(instr.bra.constant_buffer == 0, "BRA with constant buffers are not implemented"); - u32 target = offset + instr.bra.GetBranchTarget(); + const u32 target = offset + instr.bra.GetBranchTarget(); shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); break; } @@ -2287,7 +2454,7 @@ private: // has a similar structure to the BRA opcode. ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported"); - u32 target = offset + instr.bra.GetBranchTarget(); + const u32 target = offset + instr.bra.GetBranchTarget(); EmitPushToSSYStack(target); break; } @@ -2381,10 +2548,10 @@ private: shader.AddLine("case " + std::to_string(label) + "u: {"); ++shader.scope; - auto next_it = labels.lower_bound(label + 1); - u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; + const auto next_it = labels.lower_bound(label + 1); + const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; - u32 compile_end = CompileRange(label, next_label); + const u32 compile_end = CompileRange(label, next_label); if (compile_end > next_label && compile_end != PROGRAM_END) { // This happens only when there is a label inside a IF/LOOP block shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }"); @@ -2447,7 +2614,8 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) { try { - auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines(); + const auto subroutines = + ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines(); GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix); return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; } catch (const DecompileFail& exception) { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index e1b1a9d73..b0466c18f 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -88,7 +88,14 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { .get_value_or({}); out += R"( in vec4 position; -layout(location = 0) out vec4 color[8]; +layout(location = 0) out vec4 FragColor0; +layout(location = 1) out vec4 FragColor1; +layout(location = 2) out vec4 FragColor2; +layout(location = 3) out vec4 FragColor3; +layout(location = 4) out vec4 FragColor4; +layout(location = 5) out vec4 FragColor5; +layout(location = 6) out vec4 FragColor6; +layout(location = 7) out vec4 FragColor7; layout (std140) uniform fs_config { vec4 viewport_flip; diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5781d9d16..5f3fe067e 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -25,7 +25,7 @@ GLuint LoadShader(const char* source, GLenum type) { default: UNREACHABLE(); } - GLuint shader_id = glCreateShader(type); + const GLuint shader_id = glCreateShader(type); glShaderSource(shader_id, 1, &source, nullptr); LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); glCompileShader(shader_id); diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index e565afcee..aadf68f16 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -29,7 +29,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coh if (GLAD_GL_ARB_buffer_storage) { persistent = true; coherent = prefer_coherent; - GLbitfield flags = + const GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); glBufferStorage(gl_target, allocate_size, nullptr, flags); mapped_ptr = static_cast<u8*>(glMapBufferRange( |
