From 2a24b1c9734a916e9a14579d4c550c84e83039b8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 2 Oct 2020 21:19:35 -0300 Subject: video_core: Enforce -Wunused-variable and -Wunused-but-set-variable --- src/video_core/engines/maxwell_dma.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index e88290754..8fa359d0a 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -114,8 +114,6 @@ void MaxwellDMA::CopyBlockLinearToPitch() { const u32 block_depth = src_params.block_size.depth; const size_t src_size = CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); - const size_t src_layer_size = - CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); if (read_buffer.size() < src_size) { read_buffer.resize(src_size); -- cgit v1.2.3 From e1600b0962c78302b05d4b98d75245b980a03831 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 2 Oct 2020 21:24:22 -0300 Subject: video_core: Enforce -Wclass-memaccess --- src/video_core/engines/shader_header.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index 72e2a33d5..ceec05459 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -41,30 +41,30 @@ struct Header { BitField<26, 1, u32> does_load_or_store; BitField<27, 1, u32> does_fp64; BitField<28, 4, u32> stream_out_mask; - } common0{}; + } common0; union { BitField<0, 24, u32> shader_local_memory_low_size; BitField<24, 8, u32> per_patch_attribute_count; - } common1{}; + } common1; union { BitField<0, 24, u32> shader_local_memory_high_size; BitField<24, 8, u32> threads_per_input_primitive; - } common2{}; + } common2; union { BitField<0, 24, u32> shader_local_memory_crs_size; BitField<24, 4, OutputTopology> output_topology; BitField<28, 4, u32> reserved; - } common3{}; + } common3; union { BitField<0, 12, u32> max_output_vertices; BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. BitField<20, 4, u32> reserved; BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4{}; + } common4; union { struct { @@ -145,7 +145,7 @@ struct Header { } } ps; - std::array raw{}; + std::array raw; }; u64 GetLocalMemorySize() const { @@ -153,7 +153,6 @@ struct Header { (common2.shader_local_memory_high_size << 24)); } }; - static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); } // namespace Tegra::Shader -- cgit v1.2.3 From 44b552be712a9db83a92710648cde4053adf876d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 28 Oct 2020 17:05:41 -0300 Subject: shader/arithmetic: Implement FCMP immediate + register variant Trivially add the encoding for this. --- src/video_core/engines/shader_bytecode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d374b73cf..a3c05d1b0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1893,6 +1893,7 @@ public: ICMP_IMM, FCMP_RR, FCMP_RC, + FCMP_IMMR, MUFU, // Multi-Function Operator RRO_C, // Range Reduction Operator RRO_R, @@ -2205,6 +2206,7 @@ private: INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), + INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), -- cgit v1.2.3 From 9ea8cffe35630f208024b4f547e9c261703edd06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 12 Jul 2020 05:03:05 -0300 Subject: maxwell_3d: Move code to separate functions Deduplicate some code and put it in separate functions so it's easier to understand and profile. --- src/video_core/engines/maxwell_3d.cpp | 267 +++++++++++++++------------------- src/video_core/engines/maxwell_3d.h | 8 + 2 files changed, 124 insertions(+), 151 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 57ebc785f..f7fa5fea7 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -124,6 +124,114 @@ void Maxwell3D::InitializeRegisterDefaults() { mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; } +void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { + if (executing_macro == 0) { + // A macro call must begin by writing the macro method's register, not its argument. + ASSERT_MSG((method % 2) == 0, + "Can't start macro execution by writing to the ARGS register"); + executing_macro = method; + } + + for (std::size_t i = 0; i < amount; i++) { + macro_params.push_back(base_start[i]); + } + + // Call the macro when there are no more parameters in the command buffer + if (is_last_call) { + CallMacroMethod(executing_macro, macro_params); + macro_params.clear(); + } +} + +u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { + // Keep track of the register value in shadow_state when requested. + const auto control = shadow_state.shadow_ram_control; + if (control == Regs::ShadowRamControl::Track || + control == Regs::ShadowRamControl::TrackWithFilter) { + shadow_state.reg_array[method] = argument; + return argument; + } + if (control == Regs::ShadowRamControl::Replay) { + return shadow_state.reg_array[method]; + } + return argument; +} + +void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) { + if (regs.reg_array[method] == argument) { + return; + } + regs.reg_array[method] = argument; + + for (const auto& table : dirty.tables) { + dirty.flags[table[method]] = true; + } +} + +void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, + bool is_last_call) { + switch (method) { + case MAXWELL3D_REG_INDEX(wait_for_idle): + return rasterizer->WaitForIdle(); + case MAXWELL3D_REG_INDEX(shadow_ram_control): + shadow_state.shadow_ram_control = static_cast(nonshadow_argument); + return; + case MAXWELL3D_REG_INDEX(macros.data): + return macro_engine->AddCode(regs.macros.upload_address, argument); + case MAXWELL3D_REG_INDEX(macros.bind): + return ProcessMacroBind(argument); + case MAXWELL3D_REG_INDEX(firmware[4]): + return ProcessFirmwareCall4(); + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): + return StartCBData(method); + case MAXWELL3D_REG_INDEX(cb_bind[0]): + return ProcessCBBind(0); + case MAXWELL3D_REG_INDEX(cb_bind[1]): + return ProcessCBBind(1); + case MAXWELL3D_REG_INDEX(cb_bind[2]): + return ProcessCBBind(2); + case MAXWELL3D_REG_INDEX(cb_bind[3]): + return ProcessCBBind(3); + case MAXWELL3D_REG_INDEX(cb_bind[4]): + return ProcessCBBind(4); + case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): + return DrawArrays(); + case MAXWELL3D_REG_INDEX(clear_buffers): + return ProcessClearBuffers(); + case MAXWELL3D_REG_INDEX(query.query_get): + return ProcessQueryGet(); + case MAXWELL3D_REG_INDEX(condition.mode): + return ProcessQueryCondition(); + case MAXWELL3D_REG_INDEX(counter_reset): + return ProcessCounterReset(); + case MAXWELL3D_REG_INDEX(sync_info): + return ProcessSyncPoint(); + case MAXWELL3D_REG_INDEX(exec_upload): + return upload_state.ProcessExec(regs.exec_upload.linear != 0); + case MAXWELL3D_REG_INDEX(data_upload): + upload_state.ProcessData(argument, is_last_call); + if (is_last_call) { + OnMemoryWrite(); + } + return; + } +} + void Maxwell3D::CallMacroMethod(u32 method, const std::vector& parameters) { // Reset the current macro. executing_macro = 0; @@ -157,142 +265,16 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { // Methods after 0xE00 are special, they're actually triggers for some microcode that was // uploaded to the GPU during initialization. if (method >= MacroRegistersStart) { - // We're trying to execute a macro - if (executing_macro == 0) { - // A macro call must begin by writing the macro method's register, not its argument. - ASSERT_MSG((method % 2) == 0, - "Can't start macro execution by writing to the ARGS register"); - executing_macro = method; - } - - macro_params.push_back(method_argument); - - // Call the macro when there are no more parameters in the command buffer - if (is_last_call) { - CallMacroMethod(executing_macro, macro_params); - macro_params.clear(); - } + ProcessMacro(method, &method_argument, 1, is_last_call); return; } ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); - u32 arg = method_argument; - // Keep track of the register value in shadow_state when requested. - if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track || - shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) { - shadow_state.reg_array[method] = arg; - } else if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Replay) { - arg = shadow_state.reg_array[method]; - } - - if (regs.reg_array[method] != arg) { - regs.reg_array[method] = arg; - - for (const auto& table : dirty.tables) { - dirty.flags[table[method]] = true; - } - } - - switch (method) { - case MAXWELL3D_REG_INDEX(wait_for_idle): { - rasterizer->WaitForIdle(); - break; - } - case MAXWELL3D_REG_INDEX(shadow_ram_control): { - shadow_state.shadow_ram_control = static_cast(method_argument); - break; - } - case MAXWELL3D_REG_INDEX(macros.data): { - macro_engine->AddCode(regs.macros.upload_address, arg); - break; - } - case MAXWELL3D_REG_INDEX(macros.bind): { - ProcessMacroBind(arg); - break; - } - case MAXWELL3D_REG_INDEX(firmware[4]): { - ProcessFirmwareCall4(); - break; - } - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { - StartCBData(method); - break; - } - case MAXWELL3D_REG_INDEX(cb_bind[0]): { - ProcessCBBind(0); - break; - } - case MAXWELL3D_REG_INDEX(cb_bind[1]): { - ProcessCBBind(1); - break; - } - case MAXWELL3D_REG_INDEX(cb_bind[2]): { - ProcessCBBind(2); - break; - } - case MAXWELL3D_REG_INDEX(cb_bind[3]): { - ProcessCBBind(3); - break; - } - case MAXWELL3D_REG_INDEX(cb_bind[4]): { - ProcessCBBind(4); - break; - } - case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): { - DrawArrays(); - break; - } - case MAXWELL3D_REG_INDEX(clear_buffers): { - ProcessClearBuffers(); - break; - } - case MAXWELL3D_REG_INDEX(query.query_get): { - ProcessQueryGet(); - break; - } - case MAXWELL3D_REG_INDEX(condition.mode): { - ProcessQueryCondition(); - break; - } - case MAXWELL3D_REG_INDEX(counter_reset): { - ProcessCounterReset(); - break; - } - case MAXWELL3D_REG_INDEX(sync_info): { - ProcessSyncPoint(); - break; - } - case MAXWELL3D_REG_INDEX(exec_upload): { - upload_state.ProcessExec(regs.exec_upload.linear != 0); - break; - } - case MAXWELL3D_REG_INDEX(data_upload): { - upload_state.ProcessData(arg, is_last_call); - if (is_last_call) { - OnMemoryWrite(); - } - break; - } - default: - break; - } + const u32 argument = ProcessShadowRam(method, method_argument); + ProcessDirtyRegisters(method, argument); + ProcessMethodCall(method, argument, method_argument, is_last_call); } void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, @@ -300,23 +282,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, // Methods after 0xE00 are special, they're actually triggers for some microcode that was // uploaded to the GPU during initialization. if (method >= MacroRegistersStart) { - // We're trying to execute a macro - if (executing_macro == 0) { - // A macro call must begin by writing the macro method's register, not its argument. - ASSERT_MSG((method % 2) == 0, - "Can't start macro execution by writing to the ARGS register"); - executing_macro = method; - } - - for (std::size_t i = 0; i < amount; i++) { - macro_params.push_back(base_start[i]); - } - - // Call the macro when there are no more parameters in the command buffer - if (amount == methods_pending) { - CallMacroMethod(executing_macro, macro_params); - macro_params.clear(); - } + ProcessMacro(method, base_start, amount, amount == methods_pending); return; } switch (method) { @@ -335,15 +301,14 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): - case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { + case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): ProcessCBMultiData(method, base_start, amount); break; - } - default: { + default: for (std::size_t i = 0; i < amount; i++) { CallMethod(method, base_start[i], methods_pending - static_cast(i) <= 1); } - } + break; } } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index bc289c55d..1cbe8fe67 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1461,6 +1461,14 @@ public: private: void InitializeRegisterDefaults(); + void ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call); + + u32 ProcessShadowRam(u32 method, u32 argument); + + void ProcessDirtyRegisters(u32 method, u32 argument); + + void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call); + Core::System& system; MemoryManager& memory_manager; -- cgit v1.2.3 From 622830f4e16a8f0eabeb1b81dea5fca613402d8c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 12 Jul 2020 05:05:04 -0300 Subject: maxwell_3d: Use insert instead of loop push_back This reduces the overhead of bounds checking on each element. It won't reduce the cost of allocation because usually this vector's capacity is usually large enough to hold whatever we push to it. --- src/video_core/engines/maxwell_3d.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index f7fa5fea7..6287df633 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -132,9 +132,7 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool executing_macro = method; } - for (std::size_t i = 0; i < amount; i++) { - macro_params.push_back(base_start[i]); - } + macro_params.insert(macro_params.end(), base_start, base_start + amount); // Call the macro when there are no more parameters in the command buffer if (is_last_call) { -- cgit v1.2.3 From 56ecafc2045dbaa1887f70da94e9e6baad576e1f Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 20 Nov 2020 02:13:45 -0500 Subject: shader_bytecode: Eliminate variable shadowing --- src/video_core/engines/shader_bytecode.h | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index a3c05d1b0..1640207a7 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -32,7 +32,7 @@ struct Register { constexpr Register() = default; - constexpr Register(u64 value) : value(value) {} + constexpr Register(u64 value_) : value(value_) {} constexpr operator u64() const { return value; @@ -75,7 +75,7 @@ enum class AttributeSize : u64 { union Attribute { Attribute() = default; - constexpr explicit Attribute(u64 value) : value(value) {} + constexpr explicit Attribute(u64 value_) : value(value_) {} enum class Index : u64 { LayerViewportPointSize = 6, @@ -124,7 +124,7 @@ union Attribute { union Sampler { Sampler() = default; - constexpr explicit Sampler(u64 value) : value(value) {} + constexpr explicit Sampler(u64 value_) : value(value_) {} enum class Index : u64 { Sampler_0 = 8, @@ -137,7 +137,7 @@ union Sampler { union Image { Image() = default; - constexpr explicit Image(u64 value) : value{value} {} + constexpr explicit Image(u64 value_) : value{value_} {} BitField<36, 13, u64> index; u64 value; @@ -658,7 +658,7 @@ union Instruction { return *this; } - constexpr Instruction(u64 value) : value{value} {} + constexpr Instruction(u64 value_) : value{value_} {} constexpr Instruction(const Instruction& instr) : value(instr.value) {} constexpr bool Bit(u64 offset) const { @@ -1624,12 +1624,13 @@ union Instruction { s32 GetBranchTarget() const { // Sign extend the branch target offset - u32 mask = 1U << (24 - 1); - u32 value = static_cast(target); + const auto mask = 1U << (24 - 1); + const auto target_value = static_cast(target); + constexpr auto instruction_size = static_cast(sizeof(Instruction)); + // The branch offset is relative to the next instruction and is stored in bytes, so // divide it by the size of an instruction and add 1 to it. - return static_cast((value ^ mask) - mask) / static_cast(sizeof(Instruction)) + - 1; + return static_cast((target_value ^ mask) - mask) / instruction_size + 1; } } bra; @@ -1639,12 +1640,13 @@ union Instruction { s32 GetBranchExtend() const { // Sign extend the branch target offset - u32 mask = 1U << (24 - 1); - u32 value = static_cast(target); + const auto mask = 1U << (24 - 1); + const auto target_value = static_cast(target); + constexpr auto instruction_size = static_cast(sizeof(Instruction)); + // The branch offset is relative to the next instruction and is stored in bytes, so // divide it by the size of an instruction and add 1 to it. - return static_cast((value ^ mask) - mask) / static_cast(sizeof(Instruction)) + - 1; + return static_cast((target_value ^ mask) - mask) / instruction_size + 1; } } brx; @@ -2004,8 +2006,8 @@ public: class Matcher { public: - constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type) - : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {} + constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) + : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} constexpr const char* GetName() const { return name; -- cgit v1.2.3 From b7cd5d742e125d17f5530bcb38fac40d1f41ea4d Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 20 Nov 2020 02:20:34 -0500 Subject: shader_bytecode: Make use of [[nodiscard]] where applicable Ensures that all queried values are made use of. --- src/video_core/engines/shader_bytecode.h | 152 ++++++++++++++++--------------- 1 file changed, 79 insertions(+), 73 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 1640207a7..37d17efdc 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -34,29 +34,29 @@ struct Register { constexpr Register(u64 value_) : value(value_) {} - constexpr operator u64() const { + [[nodiscard]] constexpr operator u64() const { return value; } template - constexpr u64 operator-(const T& oth) const { + [[nodiscard]] constexpr u64 operator-(const T& oth) const { return value - oth; } template - constexpr u64 operator&(const T& oth) const { + [[nodiscard]] constexpr u64 operator&(const T& oth) const { return value & oth; } - constexpr u64 operator&(const Register& oth) const { + [[nodiscard]] constexpr u64 operator&(const Register& oth) const { return value & oth.value; } - constexpr u64 operator~() const { + [[nodiscard]] constexpr u64 operator~() const { return ~value; } - u64 GetSwizzledIndex(u64 elem) const { + [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const { elem = (value + elem) & 3; return (value & ~3) + elem; } @@ -107,7 +107,7 @@ union Attribute { BitField<31, 1, u64> patch; BitField<47, 3, AttributeSize> size; - bool IsPhysical() const { + [[nodiscard]] bool IsPhysical() const { return patch == 0 && element == 0 && static_cast(index.Value()) == 0; } } fmt20; @@ -505,14 +505,14 @@ struct IpaMode { IpaInterpMode interpolation_mode; IpaSampleMode sampling_mode; - bool operator==(const IpaMode& a) const { + [[nodiscard]] bool operator==(const IpaMode& a) const { return std::tie(interpolation_mode, sampling_mode) == std::tie(a.interpolation_mode, a.sampling_mode); } - bool operator!=(const IpaMode& a) const { + [[nodiscard]] bool operator!=(const IpaMode& a) const { return !operator==(a); } - bool operator<(const IpaMode& a) const { + [[nodiscard]] bool operator<(const IpaMode& a) const { return std::tie(interpolation_mode, sampling_mode) < std::tie(a.interpolation_mode, a.sampling_mode); } @@ -661,7 +661,7 @@ union Instruction { constexpr Instruction(u64 value_) : value{value_} {} constexpr Instruction(const Instruction& instr) : value(instr.value) {} - constexpr bool Bit(u64 offset) const { + [[nodiscard]] constexpr bool Bit(u64 offset) const { return ((value >> offset) & 1) != 0; } @@ -746,34 +746,34 @@ union Instruction { BitField<28, 8, u64> imm_lut28; BitField<48, 8, u64> imm_lut48; - u32 GetImmLut28() const { + [[nodiscard]] u32 GetImmLut28() const { return static_cast(imm_lut28); } - u32 GetImmLut48() const { + [[nodiscard]] u32 GetImmLut48() const { return static_cast(imm_lut48); } } lop3; - u16 GetImm20_16() const { + [[nodiscard]] u16 GetImm20_16() const { return static_cast(imm20_16); } - u32 GetImm20_19() const { + [[nodiscard]] u32 GetImm20_19() const { u32 imm{static_cast(imm20_19)}; imm <<= 12; imm |= negate_imm ? 0x80000000 : 0; return imm; } - u32 GetImm20_32() const { + [[nodiscard]] u32 GetImm20_32() const { return static_cast(imm20_32); } - s32 GetSignedImm20_20() const { - u32 immediate = static_cast(imm20_19 | (negate_imm << 19)); + [[nodiscard]] s32 GetSignedImm20_20() const { + const auto immediate = static_cast(imm20_19 | (negate_imm << 19)); // Sign extend the 20-bit value. - u32 mask = 1U << (20 - 1); + const auto mask = 1U << (20 - 1); return static_cast((immediate ^ mask) - mask); } } alu; @@ -857,7 +857,7 @@ union Instruction { BitField<56, 1, u64> second_negate; BitField<30, 9, u64> second; - u32 PackImmediates() const { + [[nodiscard]] u32 PackImmediates() const { // Immediates are half floats shifted. constexpr u32 imm_shift = 6; return static_cast((first << imm_shift) | (second << (16 + imm_shift))); @@ -1033,7 +1033,7 @@ union Instruction { BitField<28, 2, AtomicType> type; BitField<30, 22, s64> offset; - s32 GetImmediateOffset() const { + [[nodiscard]] s32 GetImmediateOffset() const { return static_cast(offset << 2); } } atoms; @@ -1215,7 +1215,7 @@ union Instruction { BitField<39, 4, u64> rounding; // H0, H1 extract for F16 missing BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value - F2fRoundingOp GetRoundingMode() const { + [[nodiscard]] F2fRoundingOp GetRoundingMode() const { constexpr u64 rounding_mask = 0x0B; return static_cast(rounding.Value() & rounding_mask); } @@ -1239,15 +1239,15 @@ union Instruction { BitField<54, 1, u64> aoffi_flag; BitField<55, 3, TextureProcessMode> process_mode; - bool IsComponentEnabled(std::size_t component) const { - return ((1ull << component) & component_mask) != 0; + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { + return ((1ULL << component) & component_mask) != 0; } - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { return process_mode; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return dc_flag != 0; @@ -1271,15 +1271,15 @@ union Instruction { BitField<36, 1, u64> aoffi_flag; BitField<37, 3, TextureProcessMode> process_mode; - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { return ((1ULL << component) & component_mask) != 0; } - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { return process_mode; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return dc_flag != 0; @@ -1299,7 +1299,7 @@ union Instruction { BitField<31, 4, u64> component_mask; BitField<49, 1, u64> nodep_flag; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NODEP: return nodep_flag != 0; @@ -1309,7 +1309,7 @@ union Instruction { return false; } - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { return ((1ULL << component) & component_mask) != 0; } } txq; @@ -1321,11 +1321,11 @@ union Instruction { BitField<35, 1, u64> ndv_flag; BitField<49, 1, u64> nodep_flag; - bool IsComponentEnabled(std::size_t component) const { - return ((1ull << component) & component_mask) != 0; + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { + return ((1ULL << component) & component_mask) != 0; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NDV: return (ndv_flag != 0); @@ -1347,7 +1347,7 @@ union Instruction { BitField<54, 2, u64> offset_mode; BitField<56, 2, u64> component; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NDV: return ndv_flag != 0; @@ -1373,7 +1373,7 @@ union Instruction { BitField<33, 2, u64> offset_mode; BitField<37, 2, u64> component; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NDV: return ndv_flag != 0; @@ -1399,7 +1399,7 @@ union Instruction { BitField<52, 2, u64> component; BitField<55, 1, u64> fp16_flag; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return dc_flag != 0; @@ -1422,16 +1422,20 @@ union Instruction { BitField<53, 4, u64> texture_info; BitField<59, 1, u64> fp32_flag; - TextureType GetTextureType() const { + [[nodiscard]] TextureType GetTextureType() const { // The TEXS instruction has a weird encoding for the texture type. - if (texture_info == 0) + if (texture_info == 0) { return TextureType::Texture1D; - if (texture_info >= 1 && texture_info <= 9) + } + if (texture_info >= 1 && texture_info <= 9) { return TextureType::Texture2D; - if (texture_info >= 10 && texture_info <= 11) + } + if (texture_info >= 10 && texture_info <= 11) { return TextureType::Texture3D; - if (texture_info >= 12 && texture_info <= 13) + } + if (texture_info >= 12 && texture_info <= 13) { return TextureType::TextureCube; + } LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", static_cast(texture_info.Value())); @@ -1439,7 +1443,7 @@ union Instruction { return TextureType::Texture1D; } - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { switch (texture_info) { case 0: case 2: @@ -1458,7 +1462,7 @@ union Instruction { return TextureProcessMode::None; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; @@ -1470,16 +1474,16 @@ union Instruction { return false; } - bool IsArrayTexture() const { + [[nodiscard]] bool IsArrayTexture() const { // TEXS only supports Texture2D arrays. return texture_info >= 7 && texture_info <= 9; } - bool HasTwoDestinations() const { + [[nodiscard]] bool HasTwoDestinations() const { return gpr28.Value() != Register::ZeroIndex; } - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { static constexpr std::array, 4> mask_lut{{ {}, {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, @@ -1506,7 +1510,7 @@ union Instruction { BitField<54, 1, u64> cl; BitField<55, 1, u64> process_mode; - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; } } tld; @@ -1516,7 +1520,7 @@ union Instruction { BitField<53, 4, u64> texture_info; BitField<59, 1, u64> fp32_flag; - TextureType GetTextureType() const { + [[nodiscard]] TextureType GetTextureType() const { // The TLDS instruction has a weird encoding for the texture type. if (texture_info <= 1) { return TextureType::Texture1D; @@ -1535,13 +1539,14 @@ union Instruction { return TextureType::Texture1D; } - TextureProcessMode GetTextureProcessMode() const { - if (texture_info == 1 || texture_info == 5 || texture_info == 12) + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { + if (texture_info == 1 || texture_info == 5 || texture_info == 12) { return TextureProcessMode::LL; + } return TextureProcessMode::LZ; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::AOFFI: return texture_info == 12 || texture_info == 4; @@ -1555,7 +1560,7 @@ union Instruction { return false; } - bool IsArrayTexture() const { + [[nodiscard]] bool IsArrayTexture() const { // TEXS only supports Texture2D arrays. return texture_info == 8; } @@ -1567,7 +1572,7 @@ union Instruction { BitField<35, 1, u64> aoffi_flag; BitField<49, 1, u64> nodep_flag; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::AOFFI: return aoffi_flag != 0; @@ -1591,7 +1596,7 @@ union Instruction { BitField<20, 3, StoreType> store_data_layout; BitField<20, 4, u64> component_mask_selector; - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { ASSERT(mode == SurfaceDataMode::P); constexpr u8 R = 0b0001; constexpr u8 G = 0b0010; @@ -1604,7 +1609,7 @@ union Instruction { return std::bitset<4>{mask.at(component_mask_selector)}.test(component); } - StoreType GetStoreDataLayout() const { + [[nodiscard]] StoreType GetStoreDataLayout() const { ASSERT(mode == SurfaceDataMode::D_BA); return store_data_layout; } @@ -1622,7 +1627,7 @@ union Instruction { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; - s32 GetBranchTarget() const { + [[nodiscard]] s32 GetBranchTarget() const { // Sign extend the branch target offset const auto mask = 1U << (24 - 1); const auto target_value = static_cast(target); @@ -1638,7 +1643,7 @@ union Instruction { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; - s32 GetBranchExtend() const { + [[nodiscard]] s32 GetBranchExtend() const { // Sign extend the branch target offset const auto mask = 1U << (24 - 1); const auto target_value = static_cast(target); @@ -1699,7 +1704,7 @@ union Instruction { BitField<50, 1, u64> is_op_b_register; BitField<51, 3, VmnmxOperation> operation; - VmnmxType SourceFormatA() const { + [[nodiscard]] VmnmxType SourceFormatA() const { switch (src_format_a) { case 0b11: return VmnmxType::Bits32; @@ -1710,7 +1715,7 @@ union Instruction { } } - VmnmxType SourceFormatB() const { + [[nodiscard]] VmnmxType SourceFormatB() const { switch (src_format_b) { case 0b11: return VmnmxType::Bits32; @@ -1741,7 +1746,7 @@ union Instruction { BitField<20, 14, u64> shifted_offset; BitField<34, 5, u64> index; - u64 GetOffset() const { + [[nodiscard]] u64 GetOffset() const { return shifted_offset * 4; } } cbuf34; @@ -1750,7 +1755,7 @@ union Instruction { BitField<20, 16, s64> offset; BitField<36, 5, u64> index; - s64 GetOffset() const { + [[nodiscard]] s64 GetOffset() const { return offset; } } cbuf36; @@ -1999,7 +2004,7 @@ public: /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be /// conditionally executed). - static bool IsPredicatedInstruction(Id opcode) { + [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) { // TODO(Subv): Add the rest of unpredicated instructions. return opcode != Id::SSY && opcode != Id::PBK; } @@ -2009,19 +2014,19 @@ public: constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} - constexpr const char* GetName() const { + [[nodiscard]] constexpr const char* GetName() const { return name; } - constexpr u16 GetMask() const { + [[nodiscard]] constexpr u16 GetMask() const { return mask; } - constexpr Id GetId() const { + [[nodiscard]] constexpr Id GetId() const { return id; } - constexpr Type GetType() const { + [[nodiscard]] constexpr Type GetType() const { return type; } @@ -2030,7 +2035,7 @@ public: * @param instruction The instruction to test * @returns true if the given instruction matches. */ - constexpr bool Matches(u16 instruction) const { + [[nodiscard]] constexpr bool Matches(u16 instruction) const { return (instruction & mask) == expected; } @@ -2042,7 +2047,8 @@ public: Type type; }; - static std::optional> Decode(Instruction instr) { + using DecodeResult = std::optional>; + [[nodiscard]] static DecodeResult Decode(Instruction instr) { static const auto table{GetDecodeTable()}; const auto matches_instruction = [instr](const auto& matcher) { @@ -2064,7 +2070,7 @@ private: * A '0' in a bitstring indicates that a zero must be present at that bit position. * A '1' in a bitstring indicates that a one must be present at that bit position. */ - static constexpr auto GetMaskAndExpect(const char* const bitstring) { + [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) { u16 mask = 0, expect = 0; for (std::size_t i = 0; i < opcode_bitsize; i++) { const std::size_t bit_position = opcode_bitsize - i - 1; @@ -2086,14 +2092,14 @@ private: public: /// Creates a matcher that can match and parse instructions based on bitstring. - static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type, - const char* const name) { + [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op, + Type type, const char* const name) { const auto [mask, expected] = GetMaskAndExpect(bitstring); return Matcher(name, mask, expected, op, type); } }; - static std::vector GetDecodeTable() { + [[nodiscard]] static std::vector GetDecodeTable() { std::vector table = { #define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) INST("111000110011----", Id::KIL, Type::Flow, "KIL"), -- cgit v1.2.3 From 2ccf85a9103afbb4dc227e481bb0e3a7360e833b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 26 Nov 2020 16:49:20 -0300 Subject: vk_shader_decompiler: Implement force early fragment tests Force early fragment tests when the 3D method is enabled. The established pipeline cache takes care of recompiling if needed. This is implemented only on Vulkan to avoid invalidating the shader cache on OpenGL. --- src/video_core/engines/maxwell_3d.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1cbe8fe67..b0d9559d0 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -755,7 +755,11 @@ public: u32 data_upload; - INSERT_UNION_PADDING_WORDS(0x44); + INSERT_UNION_PADDING_WORDS(0x16); + + u32 force_early_fragment_tests; + + INSERT_UNION_PADDING_WORDS(0x2D); struct { union { @@ -1572,6 +1576,7 @@ ASSERT_REG_POSITION(shadow_ram_control, 0x49); ASSERT_REG_POSITION(upload, 0x60); ASSERT_REG_POSITION(exec_upload, 0x6C); ASSERT_REG_POSITION(data_upload, 0x6D); +ASSERT_REG_POSITION(force_early_fragment_tests, 0x84); ASSERT_REG_POSITION(sync_info, 0xB2); ASSERT_REG_POSITION(tess_mode, 0xC8); ASSERT_REG_POSITION(tess_level_outer, 0xC9); -- cgit v1.2.3 From 677a8b208d47d0d2397197ce74c7039a8ea79d20 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 4 Dec 2020 14:39:12 -0500 Subject: video_core: Resolve more variable shadowing scenarios Resolves variable shadowing scenarios up to the end of the OpenGL code to make it nicer to review. The rest will be resolved in a following commit. --- src/video_core/engines/engine_upload.cpp | 8 ++++---- src/video_core/engines/engine_upload.h | 4 ++-- src/video_core/engines/kepler_memory.cpp | 4 ++-- src/video_core/engines/kepler_memory.h | 2 +- src/video_core/engines/maxwell_dma.cpp | 6 ++++-- src/video_core/engines/maxwell_dma.h | 4 ++-- 6 files changed, 15 insertions(+), 13 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index d44ad0cd8..71d7e1473 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -11,16 +11,16 @@ namespace Tegra::Engines::Upload { -State::State(MemoryManager& memory_manager, Registers& regs) - : regs{regs}, memory_manager{memory_manager} {} +State::State(MemoryManager& memory_manager_, Registers& regs_) + : regs{regs_}, memory_manager{memory_manager_} {} State::~State() = default; -void State::ProcessExec(const bool is_linear) { +void State::ProcessExec(const bool is_linear_) { write_offset = 0; copy_size = regs.line_length_in * regs.line_count; inner_buffer.resize(copy_size); - this->is_linear = is_linear; + is_linear = is_linear_; } void State::ProcessData(const u32 data, const bool is_last_call) { diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 462da419e..1c7f1effa 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -54,10 +54,10 @@ struct Registers { class State { public: - State(MemoryManager& memory_manager, Registers& regs); + explicit State(MemoryManager& memory_manager_, Registers& regs_); ~State(); - void ProcessExec(bool is_linear); + void ProcessExec(bool is_linear_); void ProcessData(u32 data, bool is_last_call); private: diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index dc71b2eec..9911140e9 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -14,8 +14,8 @@ namespace Tegra::Engines { -KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) - : system{system}, upload_state{memory_manager, regs.upload} {} +KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager) + : system{system_}, upload_state{memory_manager, regs.upload} {} KeplerMemory::~KeplerMemory() = default; diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 5b7f71a00..62483589e 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -35,7 +35,7 @@ namespace Tegra::Engines { class KeplerMemory final : public EngineInterface { public: - KeplerMemory(Core::System& system, MemoryManager& memory_manager); + explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager); ~KeplerMemory(); /// Write the value to the register identified by method. diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 8fa359d0a..1c29e895e 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -16,8 +16,10 @@ namespace Tegra::Engines { using namespace Texture; -MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) - : system{system}, memory_manager{memory_manager} {} +MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) + : system{system_}, memory_manager{memory_manager_} {} + +MaxwellDMA::~MaxwellDMA() = default; void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 50f445efc..17bd280c4 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -185,8 +185,8 @@ public: }; static_assert(sizeof(RemapConst) == 12); - explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); - ~MaxwellDMA() = default; + explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_); + ~MaxwellDMA(); /// Write the value to the register identified by method. void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; -- cgit v1.2.3 From a6e6cd5788dac6dc26101f4162d7704ca5c274e8 Mon Sep 17 00:00:00 2001 From: comex Date: Mon, 31 Aug 2020 10:22:03 -0400 Subject: maxwell_dma: Rename RenderEnable::Mode::FALSE and TRUE to avoid name conflict On Apple platforms, FALSE and TRUE are defined as macros by , which is included by various system headers. Note that there appear to be no actual users of the names to fix up. --- src/video_core/engines/maxwell_dma.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 17bd280c4..3c59eeb13 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -72,11 +72,13 @@ public: struct RenderEnable { enum class Mode : u32 { - FALSE = 0, - TRUE = 1, - CONDITIONAL = 2, - RENDER_IF_EQUAL = 3, - RENDER_IF_NOT_EQUAL = 4, + // Note: This uses Pascal case in order to avoid the identifiers + // FALSE and TRUE, which are reserved on Darwin. + False = 0, + True = 1, + Conditional = 2, + RenderIfEqual = 3, + RenderIfNotEqual = 4, }; PackedGPUVAddr address; -- cgit v1.2.3 From bcc5c4403ab8a42adcf65f98f70a4f6d0ca94a02 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 6 Dec 2020 20:46:53 -0500 Subject: maxwell_3d: Remove unused dirty_pointer array This is unused and removing it shrinks the structure by 3584 bytes. --- src/video_core/engines/maxwell_3d.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index b0d9559d0..d9b53cb67 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1504,8 +1504,6 @@ private: bool execute_on{true}; - std::array dirty_pointers{}; - /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; -- cgit v1.2.3 From ce0712bf95ec7e261f42108214ebf3f27cd32589 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 6 Dec 2020 20:48:10 -0500 Subject: maxwell_3d: Resolve -Wdocumentation warning Removes a documentation comment for a non-existent member. --- src/video_core/engines/maxwell_3d.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d9b53cb67..71afa3ed3 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1512,8 +1512,8 @@ private: /** * Call a macro on this engine. + * * @param method Method to call - * @param num_parameters Number of arguments * @param parameters Arguments to the method call */ void CallMacroMethod(u32 method, const std::vector& parameters); -- cgit v1.2.3 From 9e7a1f13516eb0bd5447e5758a2f7227a57f9bd9 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 6 Dec 2020 20:51:03 -0500 Subject: maxwell_3d: Move member variables to end of class Follows our established coding style. --- src/video_core/engines/maxwell_3d.h | 63 +++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 31 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 71afa3ed3..564acbc53 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1473,37 +1473,6 @@ private: void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call); - Core::System& system; - MemoryManager& memory_manager; - - VideoCore::RasterizerInterface* rasterizer = nullptr; - - /// Start offsets of each macro in macro_memory - std::array macro_positions = {}; - - std::array mme_inline{}; - - /// Macro method that is currently being executed / being fed parameters. - u32 executing_macro = 0; - /// Parameters that have been submitted to the macro call so far. - std::vector macro_params; - - /// Interpreter for the macro codes uploaded to the GPU. - std::unique_ptr macro_engine; - - static constexpr u32 null_cb_data = 0xFFFFFFFF; - struct { - std::array, 16> buffer; - u32 current{null_cb_data}; - u32 id{null_cb_data}; - u32 start_pos{}; - u32 counter{}; - } cb_data_state; - - Upload::State upload_state; - - bool execute_on{true}; - /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; @@ -1562,6 +1531,38 @@ private: /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); + + Core::System& system; + MemoryManager& memory_manager; + + VideoCore::RasterizerInterface* rasterizer = nullptr; + + /// Start offsets of each macro in macro_memory + std::array macro_positions{}; + + std::array mme_inline{}; + + /// Macro method that is currently being executed / being fed parameters. + u32 executing_macro = 0; + /// Parameters that have been submitted to the macro call so far. + std::vector macro_params; + + /// Interpreter for the macro codes uploaded to the GPU. + std::unique_ptr macro_engine; + + static constexpr u32 null_cb_data = 0xFFFFFFFF; + struct CBDataState { + std::array, 16> buffer; + u32 current{null_cb_data}; + u32 id{null_cb_data}; + u32 start_pos{}; + u32 counter{}; + }; + CBDataState cb_data_state; + + Upload::State upload_state; + + bool execute_on{true}; }; #define ASSERT_REG_POSITION(field_name, position) \ -- cgit v1.2.3 From 4c5f5c9bf301d3626df104dbed6fed6f115cedc8 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 7 Dec 2020 00:41:47 -0500 Subject: video_core: Remove unnecessary enum class casting in logging messages fmt now automatically prints the numeric value of an enum class member by default, so we don't need to use casts any more. Reduces the line noise a bit. --- src/video_core/engines/fermi_2d.cpp | 3 +-- src/video_core/engines/maxwell_3d.cpp | 11 +++++------ src/video_core/engines/shader_bytecode.h | 6 ++---- 3 files changed, 8 insertions(+), 12 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 9409c4075..4293d676c 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -48,8 +48,7 @@ static std::pair DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_ } void Fermi2D::HandleSurfaceCopy() { - LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", - static_cast(regs.operation)); + LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation); // TODO(Subv): Only raw copies are implemented. ASSERT(regs.operation == Operation::SrcCopy); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 6287df633..761962ed0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -359,7 +359,7 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { } void Maxwell3D::FlushMMEInlineDraw() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast(regs.draw.topology.Value()), + LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); @@ -504,8 +504,7 @@ void Maxwell3D::ProcessCounterReset() { rasterizer->ResetCounter(QueryType::SamplesPassed); break; default: - LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", - static_cast(regs.counter_reset)); + LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.counter_reset); break; } } @@ -520,7 +519,7 @@ void Maxwell3D::ProcessSyncPoint() { } void Maxwell3D::DrawArrays() { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast(regs.draw.topology.Value()), + LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); @@ -558,12 +557,12 @@ std::optional Maxwell3D::GetQueryResult() { return 0; case Regs::QuerySelect::SamplesPassed: // Deferred. - rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, + rasterizer->Query(regs.query.QueryAddress(), QueryType::SamplesPassed, system.GPU().GetTicks()); return std::nullopt; default: LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", - static_cast(regs.query.query_get.select.Value())); + regs.query.query_get.select.Value()); return 1; } } diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 37d17efdc..8b45f1b62 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1437,8 +1437,7 @@ union Instruction { return TextureType::TextureCube; } - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", - static_cast(texture_info.Value())); + LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); UNREACHABLE(); return TextureType::Texture1D; } @@ -1533,8 +1532,7 @@ union Instruction { return TextureType::Texture3D; } - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", - static_cast(texture_info.Value())); + LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); UNREACHABLE(); return TextureType::Texture1D; } -- cgit v1.2.3 From 9764c13d6d2977903f407761b27d847c0056e1c4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 02:25:23 -0300 Subject: video_core: Rewrite the texture cache The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues. --- src/video_core/engines/fermi_2d.cpp | 89 +++----- src/video_core/engines/fermi_2d.h | 331 ++++++++++++++++++++++-------- src/video_core/engines/kepler_compute.cpp | 26 +-- src/video_core/engines/kepler_compute.h | 5 - src/video_core/engines/maxwell_3d.cpp | 45 ++-- src/video_core/engines/maxwell_3d.h | 127 +++++++----- src/video_core/engines/maxwell_dma.cpp | 3 + 7 files changed, 377 insertions(+), 249 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 4293d676c..a01d334ad 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -10,7 +10,11 @@ namespace Tegra::Engines { -Fermi2D::Fermi2D() = default; +Fermi2D::Fermi2D() { + // Nvidia's OpenGL driver seems to assume these values + regs.src.depth = 1; + regs.dst.depth = 1; +} Fermi2D::~Fermi2D() = default; @@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Fermi2D register, increase the size of the Regs structure"); - regs.reg_array[method] = method_argument; - switch (method) { - // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, - // so trigger on the second 32-bit write. - case FERMI2D_REG_INDEX(blit_src_y) + 1: { - HandleSurfaceCopy(); - break; - } + if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) { + Blit(); } } void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { - for (std::size_t i = 0; i < amount; i++) { - CallMethod(method, base_start[i], methods_pending - static_cast(i) <= 1); + for (u32 i = 0; i < amount; ++i) { + CallMethod(method, base_start[i], methods_pending - i <= 1); } } -static std::pair DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { - const u32 line_a = src_2 - src_1; - const u32 line_b = dst_2 - dst_1; - const u32 excess = std::max(0, line_a - src_line + src_1); - return {line_b - (excess * line_b) / line_a, excess}; -} - -void Fermi2D::HandleSurfaceCopy() { - LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation); +void Fermi2D::Blit() { + LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", + regs.src.Address(), regs.dst.Address()); - // TODO(Subv): Only raw copies are implemented. - ASSERT(regs.operation == Operation::SrcCopy); + UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy"); + UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero"); + UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero"); + UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one"); + UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); - const u32 src_blit_x1{static_cast(regs.blit_src_x >> 32)}; - const u32 src_blit_y1{static_cast(regs.blit_src_y >> 32)}; - u32 src_blit_x2, src_blit_y2; - if (regs.blit_control.origin == Origin::Corner) { - src_blit_x2 = - static_cast((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32); - src_blit_y2 = - static_cast((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32); - } else { - src_blit_x2 = static_cast((regs.blit_src_x >> 32) + regs.blit_dst_width); - src_blit_y2 = static_cast((regs.blit_src_y >> 32) + regs.blit_dst_height); - } - u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width; - u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height; - const auto [new_dst_w, src_excess_x] = - DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width); - const auto [new_dst_h, src_excess_y] = - DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height); - dst_blit_x2 = new_dst_w + regs.blit_dst_x; - src_blit_x2 = src_blit_x2 - src_excess_x; - dst_blit_y2 = new_dst_h + regs.blit_dst_y; - src_blit_y2 = src_blit_y2 - src_excess_y; - const auto [new_src_w, dst_excess_x] = - DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width); - const auto [new_src_h, dst_excess_y] = - DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height); - src_blit_x2 = new_src_w + src_blit_x1; - dst_blit_x2 = dst_blit_x2 - dst_excess_x; - src_blit_y2 = new_src_h + src_blit_y1; - dst_blit_y2 = dst_blit_y2 - dst_excess_y; - const Common::Rectangle src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; - const Common::Rectangle dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2, - dst_blit_y2}; - const Config copy_config{ + const auto& args = regs.pixels_from_memory; + const Config config{ .operation = regs.operation, - .filter = regs.blit_control.filter, - .src_rect = src_rect, - .dst_rect = dst_rect, + .filter = args.sample_mode.filter, + .dst_x0 = args.dst_x0, + .dst_y0 = args.dst_y0, + .dst_x1 = args.dst_x0 + args.dst_width, + .dst_y1 = args.dst_y0 + args.dst_height, + .src_x0 = static_cast(args.src_x0 >> 32), + .src_y0 = static_cast(args.src_y0 >> 32), + .src_x1 = static_cast((args.du_dx * args.dst_width + args.src_x0) >> 32), + .src_y1 = static_cast((args.dv_dy * args.dst_height + args.src_y0) >> 32), }; - if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { + if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0909709ec..81522988e 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -53,8 +53,8 @@ public: }; enum class Filter : u32 { - PointSample = 0, // Nearest - Linear = 1, + Point = 0, + Bilinear = 1, }; enum class Operation : u32 { @@ -67,88 +67,235 @@ public: BlendPremult = 6, }; - struct Regs { - static constexpr std::size_t NUM_REGS = 0x258; + enum class MemoryLayout : u32 { + BlockLinear = 0, + Pitch = 1, + }; - struct Surface { - RenderTargetFormat format; - BitField<0, 1, u32> linear; - union { - BitField<0, 4, u32> block_width; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_depth; - }; - u32 depth; - u32 layer; - u32 pitch; - u32 width; - u32 height; - u32 address_high; - u32 address_low; - - GPUVAddr Address() const { - return static_cast((static_cast(address_high) << 32) | - address_low); - } - - u32 BlockWidth() const { - return block_width.Value(); - } - - u32 BlockHeight() const { - return block_height.Value(); - } - - u32 BlockDepth() const { - return block_depth.Value(); - } - }; - static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); + enum class CpuIndexWrap : u32 { + Wrap = 0, + NoWrap = 1, + }; + struct Surface { + RenderTargetFormat format; + MemoryLayout linear; union { - struct { - INSERT_UNION_PADDING_WORDS(0x80); + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + }; + u32 depth; + u32 layer; + u32 pitch; + u32 width; + u32 height; + u32 addr_upper; + u32 addr_lower; + + [[nodiscard]] constexpr GPUVAddr Address() const noexcept { + return (static_cast(addr_upper) << 32) | static_cast(addr_lower); + } + }; + static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); - Surface dst; + enum class SectorPromotion : u32 { + NoPromotion = 0, + PromoteTo2V = 1, + PromoteTo2H = 2, + PromoteTo4 = 3, + }; + + enum class NumTpcs : u32 { + All = 0, + One = 1, + }; - INSERT_UNION_PADDING_WORDS(2); + enum class RenderEnableMode : u32 { + False = 0, + True = 1, + Conditional = 2, + RenderIfEqual = 3, + RenderIfNotEqual = 4, + }; - Surface src; + enum class ColorKeyFormat : u32 { + A16R56G6B5 = 0, + A1R5G55B5 = 1, + A8R8G8B8 = 2, + A2R10G10B10 = 3, + Y8 = 4, + Y16 = 5, + Y32 = 6, + }; - INSERT_UNION_PADDING_WORDS(0x15); + union Beta4 { + BitField<0, 8, u32> b; + BitField<8, 8, u32> g; + BitField<16, 8, u32> r; + BitField<24, 8, u32> a; + }; - Operation operation; + struct Point { + u32 x; + u32 y; + }; - INSERT_UNION_PADDING_WORDS(0x177); + enum class PatternSelect : u32 { + MonoChrome8x8 = 0, + MonoChrome64x1 = 1, + MonoChrome1x64 = 2, + Color = 3, + }; + enum class NotifyType : u32 { + WriteOnly = 0, + WriteThenAwaken = 1, + }; + + enum class MonochromePatternColorFormat : u32 { + A8X8R8G6B5 = 0, + A1R5G5B5 = 1, + A8R8G8B8 = 2, + A8Y8 = 3, + A8X8Y16 = 4, + Y32 = 5, + }; + + enum class MonochromePatternFormat : u32 { + CGA6_M1 = 0, + LE_M1 = 1, + }; + + union Regs { + static constexpr std::size_t NUM_REGS = 0x258; + struct { + u32 object; + INSERT_UNION_PADDING_WORDS(0x3F); + u32 no_operation; + NotifyType notify; + INSERT_UNION_PADDING_WORDS(0x2); + u32 wait_for_idle; + INSERT_UNION_PADDING_WORDS(0xB); + u32 pm_trigger; + INSERT_UNION_PADDING_WORDS(0xF); + u32 context_dma_notify; + u32 dst_context_dma; + u32 src_context_dma; + u32 semaphore_context_dma; + INSERT_UNION_PADDING_WORDS(0x1C); + Surface dst; + CpuIndexWrap pixels_from_cpu_index_wrap; + u32 kind2d_check_enable; + Surface src; + SectorPromotion pixels_from_memory_sector_promotion; + INSERT_UNION_PADDING_WORDS(0x1); + NumTpcs num_tpcs; + u32 render_enable_addr_upper; + u32 render_enable_addr_lower; + RenderEnableMode render_enable_mode; + INSERT_UNION_PADDING_WORDS(0x4); + u32 clip_x0; + u32 clip_y0; + u32 clip_width; + u32 clip_height; + BitField<0, 1, u32> clip_enable; + BitField<0, 3, ColorKeyFormat> color_key_format; + u32 color_key; + BitField<0, 1, u32> color_key_enable; + BitField<0, 8, u32> rop; + u32 beta1; + Beta4 beta4; + Operation operation; + union { + BitField<0, 6, u32> x; + BitField<8, 6, u32> y; + } pattern_offset; + BitField<0, 2, PatternSelect> pattern_select; + INSERT_UNION_PADDING_WORDS(0xC); + struct { + BitField<0, 3, MonochromePatternColorFormat> color_format; + BitField<0, 1, MonochromePatternFormat> format; + u32 color0; + u32 color1; + u32 pattern0; + u32 pattern1; + } monochrome_pattern; + struct { + std::array X8R8G8B8; + std::array R5G6B5; + std::array X1R5G5B5; + std::array Y8; + } color_pattern; + INSERT_UNION_PADDING_WORDS(0x10); + struct { + u32 prim_mode; + u32 prim_color_format; + u32 prim_color; + u32 line_tie_break_bits; + INSERT_UNION_PADDING_WORDS(0x14); + u32 prim_point_xy; + INSERT_UNION_PADDING_WORDS(0x7); + std::array prim_point; + } render_solid; + struct { + u32 data_type; + u32 color_format; + u32 index_format; + u32 mono_format; + u32 wrap; + u32 color0; + u32 color1; + u32 mono_opacity; + INSERT_UNION_PADDING_WORDS(0x6); + u32 src_width; + u32 src_height; + u32 dx_du_frac; + u32 dx_du_int; + u32 dx_dv_frac; + u32 dy_dv_int; + u32 dst_x0_frac; + u32 dst_x0_int; + u32 dst_y0_frac; + u32 dst_y0_int; + u32 data; + } pixels_from_cpu; + INSERT_UNION_PADDING_WORDS(0x3); + u32 big_endian_control; + INSERT_UNION_PADDING_WORDS(0x3); + struct { + BitField<0, 3, u32> block_shape; + BitField<0, 5, u32> corral_size; + BitField<0, 1, u32> safe_overlap; union { - u32 raw; BitField<0, 1, Origin> origin; BitField<4, 1, Filter> filter; - } blit_control; - + } sample_mode; INSERT_UNION_PADDING_WORDS(0x8); - - u32 blit_dst_x; - u32 blit_dst_y; - u32 blit_dst_width; - u32 blit_dst_height; - u64 blit_du_dx; - u64 blit_dv_dy; - u64 blit_src_x; - u64 blit_src_y; - - INSERT_UNION_PADDING_WORDS(0x21); - }; - std::array reg_array; + s32 dst_x0; + s32 dst_y0; + s32 dst_width; + s32 dst_height; + s64 du_dx; + s64 dv_dy; + s64 src_x0; + s64 src_y0; + } pixels_from_memory; }; + std::array reg_array; } regs{}; struct Config { - Operation operation{}; - Filter filter{}; - Common::Rectangle src_rect; - Common::Rectangle dst_rect; + Operation operation; + Filter filter; + s32 dst_x0; + s32 dst_y0; + s32 dst_x1; + s32 dst_y1; + s32 src_x0; + s32 src_y0; + s32 src_x1; + s32 src_y1; }; private: @@ -156,25 +303,49 @@ private: /// Performs the copy from the source surface to the destination surface as configured in the /// registers. - void HandleSurfaceCopy(); + void Blit(); }; #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ + static_assert(offsetof(Fermi2D::Regs, field_name) == position, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(dst, 0x80); -ASSERT_REG_POSITION(src, 0x8C); -ASSERT_REG_POSITION(operation, 0xAB); -ASSERT_REG_POSITION(blit_control, 0x223); -ASSERT_REG_POSITION(blit_dst_x, 0x22c); -ASSERT_REG_POSITION(blit_dst_y, 0x22d); -ASSERT_REG_POSITION(blit_dst_width, 0x22e); -ASSERT_REG_POSITION(blit_dst_height, 0x22f); -ASSERT_REG_POSITION(blit_du_dx, 0x230); -ASSERT_REG_POSITION(blit_dv_dy, 0x232); -ASSERT_REG_POSITION(blit_src_x, 0x234); -ASSERT_REG_POSITION(blit_src_y, 0x236); +ASSERT_REG_POSITION(object, 0x0); +ASSERT_REG_POSITION(no_operation, 0x100); +ASSERT_REG_POSITION(notify, 0x104); +ASSERT_REG_POSITION(wait_for_idle, 0x110); +ASSERT_REG_POSITION(pm_trigger, 0x140); +ASSERT_REG_POSITION(context_dma_notify, 0x180); +ASSERT_REG_POSITION(dst_context_dma, 0x184); +ASSERT_REG_POSITION(src_context_dma, 0x188); +ASSERT_REG_POSITION(semaphore_context_dma, 0x18C); +ASSERT_REG_POSITION(dst, 0x200); +ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228); +ASSERT_REG_POSITION(kind2d_check_enable, 0x22C); +ASSERT_REG_POSITION(src, 0x230); +ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258); +ASSERT_REG_POSITION(num_tpcs, 0x260); +ASSERT_REG_POSITION(render_enable_addr_upper, 0x264); +ASSERT_REG_POSITION(render_enable_addr_lower, 0x268); +ASSERT_REG_POSITION(clip_x0, 0x280); +ASSERT_REG_POSITION(clip_y0, 0x284); +ASSERT_REG_POSITION(clip_width, 0x288); +ASSERT_REG_POSITION(clip_height, 0x28c); +ASSERT_REG_POSITION(clip_enable, 0x290); +ASSERT_REG_POSITION(color_key_format, 0x294); +ASSERT_REG_POSITION(color_key, 0x298); +ASSERT_REG_POSITION(rop, 0x2A0); +ASSERT_REG_POSITION(beta1, 0x2A4); +ASSERT_REG_POSITION(beta4, 0x2A8); +ASSERT_REG_POSITION(operation, 0x2AC); +ASSERT_REG_POSITION(pattern_offset, 0x2B0); +ASSERT_REG_POSITION(pattern_select, 0x2B4); +ASSERT_REG_POSITION(monochrome_pattern, 0x2E8); +ASSERT_REG_POSITION(color_pattern, 0x300); +ASSERT_REG_POSITION(render_solid, 0x580); +ASSERT_REG_POSITION(pixels_from_cpu, 0x800); +ASSERT_REG_POSITION(big_endian_control, 0x870); +ASSERT_REG_POSITION(pixels_from_memory, 0x880); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 898370739..ba387506e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun } } -Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { - const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); - ASSERT(cbuf_mask[regs.tex_cb_index]); - - const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index]; - ASSERT(texinfo.Address() != 0); - - const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle); - ASSERT(address < texinfo.Address() + texinfo.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read(address)}; - return GetTextureInfo(tex_handle); -} - -Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const { - return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; -} - u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { ASSERT(stage == ShaderType::Compute); const auto& buffer = launch_description.const_buffer_config[const_buffer]; @@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { const Texture::TextureHandle tex_handle{handle}; - const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); - SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); - result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); + const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); + + SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); + result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); return result; } diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7f2500aab..51a041202 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -209,11 +209,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - Texture::FullTextureInfo GetTexture(std::size_t offset) const; - - /// Given a texture handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 761962ed0..9be651e24 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include #include #include "common/assert.h" @@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume OnMemoryWrite(); } return; + case MAXWELL3D_REG_INDEX(fragment_barrier): + return rasterizer->FragmentBarrier(); + case MAXWELL3D_REG_INDEX(tiled_cache_barrier): + return rasterizer->TiledCacheBarrier(); } } @@ -639,7 +642,7 @@ void Maxwell3D::FinishCBData() { } Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { - const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; + const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)}; Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); @@ -648,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { } Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { - const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; + const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)}; Texture::TSCEntry tsc_entry; memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; } -Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const { - return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; -} - -Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const { - const auto stage_index = static_cast(stage); - const auto& shader = state.shader_stages[stage_index]; - const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; - ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - - const GPUVAddr tex_info_address = - tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); - - ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read(tex_info_address)}; - - return GetTextureInfo(tex_handle); -} - u32 Maxwell3D::GetRegisterValue(u32 method) const { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); return regs.reg_array[method]; } void Maxwell3D::ProcessClearBuffers() { - ASSERT(regs.clear_buffers.R == regs.clear_buffers.G && - regs.clear_buffers.R == regs.clear_buffers.B && - regs.clear_buffers.R == regs.clear_buffers.A); - rasterizer->Clear(); } @@ -692,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse ASSERT(stage != ShaderType::Compute); const auto& shader_stage = state.shader_stages[static_cast(stage)]; const auto& buffer = shader_stage.const_buffers[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); - return result; + return memory_manager.Read(buffer.address + offset); } SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { @@ -712,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { const Texture::TextureHandle tex_handle{handle}; - const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); - SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); - result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); + const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); + + SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); + result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); return result; } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 564acbc53..bf9e07c9b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -438,16 +438,6 @@ public: DecrWrapOGL = 0x8508, }; - enum class MemoryLayout : u32 { - Linear = 0, - BlockLinear = 1, - }; - - enum class InvMemoryLayout : u32 { - BlockLinear = 0, - Linear = 1, - }; - enum class CounterReset : u32 { SampleCnt = 0x01, Unk02 = 0x02, @@ -589,21 +579,31 @@ public: NegativeW = 7, }; + enum class SamplerIndex : u32 { + Independently = 0, + ViaHeaderIndex = 1, + }; + + struct TileMode { + union { + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + BitField<12, 1, u32> is_pitch_linear; + BitField<16, 1, u32> is_3d; + }; + }; + static_assert(sizeof(TileMode) == 4); + struct RenderTargetConfig { u32 address_high; u32 address_low; u32 width; u32 height; Tegra::RenderTargetFormat format; + TileMode tile_mode; union { - BitField<0, 3, u32> block_width; - BitField<4, 3, u32> block_height; - BitField<8, 3, u32> block_depth; - BitField<12, 1, InvMemoryLayout> type; - BitField<16, 1, u32> is_3d; - } memory_layout; - union { - BitField<0, 16, u32> layers; + BitField<0, 16, u32> depth; BitField<16, 1, u32> volume; }; u32 layer_stride; @@ -832,7 +832,11 @@ public: u32 patch_vertices; - INSERT_UNION_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0x4); + + u32 fragment_barrier; + + INSERT_UNION_PADDING_WORDS(0x7); std::array scissor_test; @@ -842,7 +846,15 @@ public: u32 stencil_back_mask; u32 stencil_back_func_mask; - INSERT_UNION_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0x5); + + u32 invalidate_texture_data_cache; + + INSERT_UNION_PADDING_WORDS(0x1); + + u32 tiled_cache_barrier; + + INSERT_UNION_PADDING_WORDS(0x4); u32 color_mask_common; @@ -866,12 +878,7 @@ public: u32 address_high; u32 address_low; Tegra::DepthFormat format; - union { - BitField<0, 4, u32> block_width; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_depth; - BitField<20, 1, InvMemoryLayout> type; - } memory_layout; + TileMode tile_mode; u32 layer_stride; GPUVAddr Address() const { @@ -880,7 +887,18 @@ public: } } zeta; - INSERT_UNION_PADDING_WORDS(0x41); + struct { + union { + BitField<0, 16, u32> x; + BitField<16, 16, u32> width; + }; + union { + BitField<0, 16, u32> y; + BitField<16, 16, u32> height; + }; + } render_area; + + INSERT_UNION_PADDING_WORDS(0x3F); union { BitField<0, 4, u32> stencil; @@ -921,7 +939,7 @@ public: BitField<25, 3, u32> map_7; }; - u32 GetMap(std::size_t index) const { + u32 Map(std::size_t index) const { const std::array maps{map_0, map_1, map_2, map_3, map_4, map_5, map_6, map_7}; ASSERT(index < maps.size()); @@ -934,11 +952,13 @@ public: u32 zeta_width; u32 zeta_height; union { - BitField<0, 16, u32> zeta_layers; + BitField<0, 16, u32> zeta_depth; BitField<16, 1, u32> zeta_volume; }; - INSERT_UNION_PADDING_WORDS(0x26); + SamplerIndex sampler_index; + + INSERT_UNION_PADDING_WORDS(0x25); u32 depth_test_enable; @@ -964,6 +984,7 @@ public: float b; float a; } blend_color; + INSERT_UNION_PADDING_WORDS(0x4); struct { @@ -1001,7 +1022,12 @@ public: float line_width_smooth; float line_width_aliased; - INSERT_UNION_PADDING_WORDS(0x1F); + INSERT_UNION_PADDING_WORDS(0x1B); + + u32 invalidate_sampler_cache_no_wfi; + u32 invalidate_texture_header_cache_no_wfi; + + INSERT_UNION_PADDING_WORDS(0x2); u32 vb_element_base; u32 vb_base_instance; @@ -1045,13 +1071,13 @@ public: } condition; struct { - u32 tsc_address_high; - u32 tsc_address_low; - u32 tsc_limit; + u32 address_high; + u32 address_low; + u32 limit; - GPUVAddr TSCAddress() const { - return static_cast( - (static_cast(tsc_address_high) << 32) | tsc_address_low); + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); } } tsc; @@ -1062,13 +1088,13 @@ public: u32 line_smooth_enable; struct { - u32 tic_address_high; - u32 tic_address_low; - u32 tic_limit; + u32 address_high; + u32 address_low; + u32 limit; - GPUVAddr TICAddress() const { - return static_cast( - (static_cast(tic_address_high) << 32) | tic_address_low); + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); } } tic; @@ -1397,12 +1423,6 @@ public: void FlushMMEInlineDraw(); - /// Given a texture handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; - - /// Returns the texture information for a specific texture in a specific shader stage. - Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; @@ -1598,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); ASSERT_REG_POSITION(patch_vertices, 0x373); +ASSERT_REG_POSITION(fragment_barrier, 0x378); ASSERT_REG_POSITION(scissor_test, 0x380); ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); +ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD); +ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF); ASSERT_REG_POSITION(color_mask_common, 0x3E4); ASSERT_REG_POSITION(depth_bounds, 0x3E7); ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); @@ -1609,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED); ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); ASSERT_REG_POSITION(zeta, 0x3F8); +ASSERT_REG_POSITION(render_area, 0x3FD); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); @@ -1617,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); ASSERT_REG_POSITION(rt_control, 0x487); ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); -ASSERT_REG_POSITION(zeta_layers, 0x48c); +ASSERT_REG_POSITION(zeta_depth, 0x48c); +ASSERT_REG_POSITION(sampler_index, 0x48D); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); @@ -1641,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(line_width_smooth, 0x4EC); ASSERT_REG_POSITION(line_width_aliased, 0x4ED); +ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509); +ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 1c29e895e..ba750748c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -96,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() { } void MaxwellDMA::CopyBlockLinearToPitch() { + UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); UNIMPLEMENTED_IF(regs.src_params.layer != 0); @@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { } void MaxwellDMA::CopyPitchToBlockLinear() { + UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); + const auto& dst_params = regs.dst_params; const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; const u32 width = dst_params.width; -- cgit v1.2.3