From a994446b6ec776c9383e8b13c45eeb461405adff Mon Sep 17 00:00:00 2001 From: Subv Date: Mon, 23 Apr 2018 20:01:29 -0500 Subject: GPU: Move the Maxwell3D macro uploading code to the inside of the Maxwell3D processor. It doesn't belong in the PFIFO handler. --- src/video_core/engines/maxwell_3d.cpp | 14 ++++++++++---- src/video_core/engines/maxwell_3d.h | 17 +++++++++++++---- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2acbb9cd6..bc40f8d98 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -22,10 +22,6 @@ constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(MemoryManager& memory_manager) : memory_manager(memory_manager), macro_interpreter(*this) {} -void Maxwell3D::SubmitMacroCode(u32 entry, std::vector code) { - uploaded_macros[entry * 2 + MacroRegistersStart] = std::move(code); -} - void Maxwell3D::CallMacroMethod(u32 method, std::vector parameters) { auto macro_code = uploaded_macros.find(method); // The requested macro must have been uploaded already. @@ -75,6 +71,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { regs.reg_array[method] = value; switch (method) { + case MAXWELL3D_REG_INDEX(macros.data): { + ProcessMacroUpload(value); + break; + } case MAXWELL3D_REG_INDEX(code_address.code_address_high): case MAXWELL3D_REG_INDEX(code_address.code_address_low): { // Note: For some reason games (like Puyo Puyo Tetris) seem to write 0 to the CODE_ADDRESS @@ -141,6 +141,12 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { } } +void Maxwell3D::ProcessMacroUpload(u32 data) { + // Store the uploaded macro code to interpret them when they're called. + auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart]; + macro.push_back(data); +} + void Maxwell3D::ProcessQueryGet() { GPUVAddr sequence_address = regs.query.QueryAddress(); // Since the sequence address is given as a GPU VAddr, we have to convert it to an application diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index a022665eb..8edc3cd38 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -322,7 +322,15 @@ public: union { struct { - INSERT_PADDING_WORDS(0x200); + INSERT_PADDING_WORDS(0x45); + + struct { + INSERT_PADDING_WORDS(1); + u32 data; + u32 entry; + } macros; + + INSERT_PADDING_WORDS(0x1B8); struct { u32 address_high; @@ -637,9 +645,6 @@ public: /// Write the value to the register identified by method. void WriteReg(u32 method, u32 value, u32 remaining_params); - /// Uploads the code for a GPU macro program associated with the specified entry. - void SubmitMacroCode(u32 entry, std::vector code); - /// Returns a list of enabled textures for the specified shader stage. std::vector GetStageTextures(Regs::ShaderStage stage) const; @@ -670,6 +675,9 @@ private: */ void CallMacroMethod(u32 method, std::vector parameters); + /// Handles writes to the macro uploading registers. + void ProcessMacroUpload(u32 data); + /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); @@ -687,6 +695,7 @@ private: static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") +ASSERT_REG_POSITION(macros, 0x45); ASSERT_REG_POSITION(rt, 0x200); ASSERT_REG_POSITION(viewport_transform[0], 0x280); ASSERT_REG_POSITION(viewport, 0x300); -- cgit v1.2.3 From c16cfbbc6c062491d84a6bc9976027b7a7587fdb Mon Sep 17 00:00:00 2001 From: Subv Date: Mon, 23 Apr 2018 20:03:50 -0500 Subject: GPU: Reduce the number of registers of Maxwell3D to 0xE00. The rest are just macro shim registers. --- src/video_core/engines/maxwell_3d.cpp | 6 +++--- src/video_core/engines/maxwell_3d.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index bc40f8d98..4306b894f 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -33,9 +33,6 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector parameters) { } void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { - ASSERT_MSG(method < Regs::NUM_REGS, - "Invalid Maxwell3D register, increase the size of the Regs structure"); - auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); // It is an error to write to a register other than the current macro's ARG register before it @@ -64,6 +61,9 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { return; } + ASSERT_MSG(method < Regs::NUM_REGS, + "Invalid Maxwell3D register, increase the size of the Regs structure"); + if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 8edc3cd38..5cf62fb01 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -31,7 +31,7 @@ public: /// Register structure of the Maxwell3D engine. /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. struct Regs { - static constexpr size_t NUM_REGS = 0xE36; + static constexpr size_t NUM_REGS = 0xE00; static constexpr size_t NumRenderTargets = 8; static constexpr size_t NumViewports = 16; @@ -613,7 +613,7 @@ public: u32 size[MaxShaderStage]; } tex_info_buffers; - INSERT_PADDING_WORDS(0x102); + INSERT_PADDING_WORDS(0xCC); }; std::array reg_array; }; -- cgit v1.2.3 From b1109931b9a92ce89635cb7c0c0c1c0c7e6866ed Mon Sep 17 00:00:00 2001 From: Subv Date: Mon, 23 Apr 2018 20:12:40 -0500 Subject: GPU: Added boilerplate code for the Fermi2D engine --- src/video_core/engines/fermi_2d.cpp | 7 ++++++- src/video_core/engines/fermi_2d.h | 28 +++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 7aab163dc..87634da21 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -7,7 +7,12 @@ namespace Tegra { namespace Engines { -void Fermi2D::WriteReg(u32 method, u32 value) {} +Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) {} + +void Fermi2D::WriteReg(u32 method, u32 value) { + ASSERT_MSG(method < Regs::NUM_REGS, + "Invalid Fermi2D register, increase the size of the Regs structure"); +} } // namespace Engines } // namespace Tegra diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 8967ddede..a97f5bb28 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -4,19 +4,45 @@ #pragma once +#include +#include "common/assert.h" +#include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/memory_manager.h" namespace Tegra { namespace Engines { +#define FERMI2D_REG_INDEX(field_name) \ + (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) + class Fermi2D final { public: - Fermi2D() = default; + explicit Fermi2D(MemoryManager& memory_manager); ~Fermi2D() = default; /// Write the value to the register identified by method. void WriteReg(u32 method, u32 value); + + struct Regs { + static constexpr size_t NUM_REGS = 0x258; + + union { + struct { + INSERT_PADDING_WORDS(0x258); + }; + std::array reg_array; + }; + } regs{}; + + MemoryManager& memory_manager; }; +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ + "Field " #field_name " has invalid position") + +#undef ASSERT_REG_POSITION + } // namespace Engines } // namespace Tegra -- cgit v1.2.3 From 378c881427d34962461099ef3d55de871710b897 Mon Sep 17 00:00:00 2001 From: Subv Date: Mon, 23 Apr 2018 22:14:08 -0500 Subject: GPU: Added surface copy registers to Fermi2D --- src/video_core/engines/fermi_2d.h | 58 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index a97f5bb28..78d06218f 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -6,8 +6,10 @@ #include #include "common/assert.h" +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" namespace Tegra { @@ -27,9 +29,59 @@ public: struct Regs { static constexpr size_t NUM_REGS = 0x258; + struct Surface { + RenderTargetFormat format; + BitField<0, 1, u32> linear; + union { + BitField<0, 4, u32> block_depth; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_width; + }; + u32 depth; + u32 layer; + u32 pitch; + u32 width; + u32 height; + u32 address_high; + u32 address_low; + + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); + } + }; + static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); + + enum class Operation : u32 { + SrcCopyAnd = 0, + ROPAnd = 1, + Blend = 2, + SrcCopy = 3, + ROP = 4, + SrcCopyPremult = 5, + BlendPremult = 6, + }; + union { struct { - INSERT_PADDING_WORDS(0x258); + INSERT_PADDING_WORDS(0x80); + + Surface dst; + + INSERT_PADDING_WORDS(2); + + Surface src; + + INSERT_PADDING_WORDS(0x15); + + Operation operation; + + INSERT_PADDING_WORDS(0x9); + + // TODO(Subv): This is only a guess. + u32 trigger; + + INSERT_PADDING_WORDS(0x1A3); }; std::array reg_array; }; @@ -42,6 +94,10 @@ public: static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") +ASSERT_REG_POSITION(dst, 0x80); +ASSERT_REG_POSITION(src, 0x8C); +ASSERT_REG_POSITION(operation, 0xAB); +ASSERT_REG_POSITION(trigger, 0xB5); #undef ASSERT_REG_POSITION } // namespace Engines -- cgit v1.2.3 From 20d86d8a36dca4bf1b465193745a365c3ab9abcd Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 24 Apr 2018 22:00:40 -0500 Subject: GPU: Partially implemented the Fermi2D surface copy operation. The hardware allows for some rather complicated operations to be performed on the data during the copy, this is not implemented. Only same-format same-size raw copies are implemented for now. --- src/video_core/engines/fermi_2d.cpp | 54 +++++++++++++++++++++++++++++++++++++ src/video_core/engines/fermi_2d.h | 5 ++++ 2 files changed, 59 insertions(+) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 87634da21..9019f2504 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -2,7 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "core/memory.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/textures/decoders.h" namespace Tegra { namespace Engines { @@ -12,6 +14,58 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) void Fermi2D::WriteReg(u32 method, u32 value) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Fermi2D register, increase the size of the Regs structure"); + + regs.reg_array[method] = value; + + switch (method) { + case FERMI2D_REG_INDEX(trigger): { + HandleSurfaceCopy(); + break; + } + } +} + +void Fermi2D::HandleSurfaceCopy() { + NGLOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", + static_cast(regs.operation)); + + const GPUVAddr source = regs.src.Address(); + const GPUVAddr dest = regs.dst.Address(); + + // TODO(Subv): Only same-format and same-size copies are allowed for now. + ASSERT(regs.src.format == regs.dst.format); + ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height); + + // TODO(Subv): Only raw copies are implemented. + ASSERT(regs.operation == Regs::Operation::SrcCopy); + + const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source); + const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest); + + u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); + u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); + + if (regs.src.linear == regs.dst.linear) { + // If the input layout and the output layout are the same, just perform a raw copy. + Memory::CopyBlock(dest_cpu, source_cpu, + src_bytes_per_pixel * regs.dst.width * regs.dst.height); + return; + } + + u8* src_buffer = Memory::GetPointer(source_cpu); + u8* dst_buffer = Memory::GetPointer(dest_cpu); + + if (!regs.src.linear && regs.dst.linear) { + // If the input is tiled and the output is linear, deswizzle the input and copy it over. + Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, + dst_bytes_per_pixel, src_buffer, dst_buffer, true, + regs.src.block_height); + } else { + // If the input is linear and the output is tiled, swizzle the input and copy it over. + Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, + dst_bytes_per_pixel, dst_buffer, src_buffer, false, + regs.dst.block_height); + } } } // namespace Engines diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 78d06218f..0c5b413cc 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -88,6 +88,11 @@ public: } regs{}; MemoryManager& memory_manager; + +private: + /// Performs the copy from the source surface to the destination surface as configured in the + /// registers. + void HandleSurfaceCopy(); }; #define ASSERT_REG_POSITION(field_name, position) \ -- cgit v1.2.3