diff options
Diffstat (limited to 'src/video_core/engines')
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 3 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 1 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 48 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 9 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 27 |
9 files changed, 65 insertions, 52 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 0ee228e28..98a8b5337 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -10,8 +10,7 @@ namespace Tegra::Engines { -Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : rasterizer{rasterizer}, memory_manager{memory_manager} {} +Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { ASSERT_MSG(method_call.method < Regs::NUM_REGS, diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 05421d185..0901cf2fa 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -33,7 +33,7 @@ namespace Tegra::Engines { class Fermi2D final { public: - explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); + explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); ~Fermi2D() = default; /// Write the value to the register identified by method. @@ -145,7 +145,6 @@ public: private: VideoCore::RasterizerInterface& rasterizer; - MemoryManager& memory_manager; /// Performs the copy from the source surface to the destination surface as configured in the /// registers. diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 44279de00..fa4a7c5c1 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -15,7 +15,7 @@ namespace Tegra::Engines { KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) - : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} + : system{system}, upload_state{memory_manager, regs.upload} {} KeplerMemory::~KeplerMemory() = default; diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index f3bc675a9..e0e25c321 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -65,7 +65,6 @@ public: private: Core::System& system; - MemoryManager& memory_manager; Upload::State upload_state; }; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 125c53360..f5158d219 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -249,16 +249,10 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { executing_macro = 0; // Lookup the macro offset - const u32 entry{(method - MacroRegistersStart) >> 1}; - const auto& search{macro_offsets.find(entry)}; - if (search == macro_offsets.end()) { - LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method); - UNREACHABLE(); - return; - } + const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size(); // Execute the current macro. - macro_interpreter.Execute(search->second, std::move(parameters)); + macro_interpreter.Execute(macro_positions[entry], std::move(parameters)); } void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { @@ -421,7 +415,7 @@ void Maxwell3D::ProcessMacroUpload(u32 data) { } void Maxwell3D::ProcessMacroBind(u32 data) { - macro_offsets[regs.macros.entry] = data; + macro_positions[regs.macros.entry++] = data; } void Maxwell3D::ProcessQueryGet() { @@ -524,7 +518,7 @@ void Maxwell3D::ProcessQueryCondition() { void Maxwell3D::ProcessSyncPoint() { const u32 sync_point = regs.sync_info.sync_point.Value(); const u32 increment = regs.sync_info.increment.Value(); - const u32 cache_flush = regs.sync_info.unknown.Value(); + [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); if (increment) { system.GPU().IncrementSyncPoint(sync_point); } @@ -626,10 +620,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); - const auto r_type{tic_entry.r_type.Value()}; - const auto g_type{tic_entry.g_type.Value()}; - const auto b_type{tic_entry.b_type.Value()}; - const auto a_type{tic_entry.a_type.Value()}; + [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()}; + [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()}; + [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()}; + [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()}; // TODO(Subv): Different data types for separate components are not supported DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1ee982b76..0184342a0 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1270,7 +1270,7 @@ private: MemoryManager& memory_manager; /// Start offsets of each macro in macro_memory - std::unordered_map<u32, u32> macro_offsets; + std::array<u32, 0x80> macro_positions = {}; /// Memory for macro code MacroMemory macro_memory; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a28c04473..ad8453c5f 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -5,18 +5,17 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/memory_manager.h" -#include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" #include "video_core/textures/decoders.h" namespace Tegra::Engines { -MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} +MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) + : system{system}, memory_manager{memory_manager} {} void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { ASSERT_MSG(method_call.method < Regs::NUM_REGS, @@ -84,13 +83,17 @@ void MaxwellDMA::HandleCopy() { ASSERT(regs.exec.enable_2d == 1); if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { - ASSERT(regs.src_params.size_z == 1); + ASSERT(regs.src_params.BlockDepth() == 0); // If the input is tiled and the output is linear, deswizzle the input and copy it over. - const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; + const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; const std::size_t src_size = Texture::CalculateSize( - true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, + true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); + const std::size_t src_layer_size = Texture::CalculateSize( + true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1, + regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); + const std::size_t dst_size = regs.dst_pitch * regs.y_count; if (read_buffer.size() < src_size) { @@ -104,23 +107,23 @@ void MaxwellDMA::HandleCopy() { memory_manager.ReadBlock(source, read_buffer.data(), src_size); memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); - Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, - regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), - write_buffer.data(), regs.src_params.BlockHeight(), - regs.src_params.pos_x, regs.src_params.pos_y); + Texture::UnswizzleSubrect( + regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, + read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(), + regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y); memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); } else { ASSERT(regs.dst_params.BlockDepth() == 0); - const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; + const u32 bytes_per_pixel = regs.src_pitch / regs.x_count; const std::size_t dst_size = Texture::CalculateSize( - true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, + true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); const std::size_t dst_layer_size = Texture::CalculateSize( - true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, + true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); const std::size_t src_size = regs.src_pitch * regs.y_count; @@ -133,14 +136,19 @@ void MaxwellDMA::HandleCopy() { write_buffer.resize(dst_size); } - memory_manager.ReadBlock(source, read_buffer.data(), src_size); - memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); + if (Settings::values.use_accurate_gpu_emulation) { + memory_manager.ReadBlock(source, read_buffer.data(), src_size); + memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); + } else { + memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); + memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); + } // If the input is linear and the output is tiled, swizzle the input and copy it over. - Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, - src_bytes_per_pixel, - write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, - read_buffer.data(), regs.dst_params.BlockHeight()); + Texture::SwizzleSubrect( + regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel, + write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(), + regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y); memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); } diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 17b015ca7..93808a9bb 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -20,10 +20,6 @@ namespace Tegra { class MemoryManager; } -namespace VideoCore { -class RasterizerInterface; -} - namespace Tegra::Engines { /** @@ -33,8 +29,7 @@ namespace Tegra::Engines { class MaxwellDMA final { public: - explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager); + explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); ~MaxwellDMA() = default; /// Write the value to the register identified by method. @@ -180,8 +175,6 @@ public: private: Core::System& system; - VideoCore::RasterizerInterface& rasterizer; - MemoryManager& memory_manager; std::vector<u8> read_buffer; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 083ee3304..c3678b9ea 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -538,6 +538,12 @@ enum class PhysicalAttributeDirection : u64 { Output = 1, }; +enum class VoteOperation : u64 { + All = 0, // allThreadsNV + Any = 1, // anyThreadNV + Eq = 2, // allThreadsEqualNV +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -565,6 +571,13 @@ union Instruction { } nop; union { + BitField<48, 2, VoteOperation> operation; + BitField<45, 3, u64> dest_pred; + BitField<39, 3, u64> value; + BitField<42, 1, u64> negate_value; + } vote; + + union { BitField<8, 8, Register> gpr; BitField<20, 24, s64> offset; } gmem; @@ -873,6 +886,7 @@ union Instruction { union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; + BitField<6, 1, u64> neg_b; BitField<7, 1, u64> abs_a; BitField<39, 3, u64> pred39; BitField<42, 1, u64> neg_pred; @@ -1006,7 +1020,6 @@ union Instruction { } iset; union { - BitField<41, 2, u64> selector; // i2i and i2f only BitField<45, 1, u64> negate_a; BitField<49, 1, u64> abs_a; BitField<10, 2, Register::Size> src_size; @@ -1023,8 +1036,6 @@ union Instruction { } f2i; union { - BitField<8, 2, Register::Size> src_size; - BitField<10, 2, Register::Size> dst_size; BitField<39, 4, u64> rounding; // H0, H1 extract for F16 missing BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value @@ -1034,6 +1045,13 @@ union Instruction { } } f2f; + union { + BitField<41, 2, u64> selector; + } int_src; + + union { + BitField<41, 1, u64> selector; + } float_src; } conversion; union { @@ -1489,6 +1507,7 @@ public: SYNC, BRK, DEPBAR, + VOTE, BFE_C, BFE_R, BFE_IMM, @@ -1651,6 +1670,7 @@ public: Hfma2, Flow, Synch, + Warp, Memory, Texture, Image, @@ -1777,6 +1797,7 @@ private: INST("111000110100---", Id::BRK, Type::Flow, "BRK"), INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), + INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), |
