diff options
Diffstat (limited to 'src/video_core/engines')
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 8 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 10 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 7 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 61 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 24 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 41 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 9 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 2 |
10 files changed, 109 insertions, 75 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 03b7ee5d8..55966eef1 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -6,12 +6,13 @@ #include "common/logging/log.h" #include "common/math_util.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" namespace Tegra::Engines { Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : memory_manager(memory_manager), rasterizer{rasterizer} {} + : rasterizer{rasterizer}, memory_manager{memory_manager} {} void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { ASSERT_MSG(method_call.method < Regs::NUM_REGS, diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 80523e320..2e51b7f13 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -10,7 +10,10 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" -#include "video_core/memory_manager.h" + +namespace Tegra { +class MemoryManager; +} namespace VideoCore { class RasterizerInterface; @@ -115,10 +118,9 @@ public: }; } regs{}; - MemoryManager& memory_manager; - private: VideoCore::RasterizerInterface& rasterizer; + MemoryManager& memory_manager; /// Performs the copy from the source surface to the destination surface as configured in the /// registers. diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 6575afd0f..fb6cdf432 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -9,7 +9,10 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" -#include "video_core/memory_manager.h" + +namespace Tegra { +class MemoryManager; +} namespace Tegra::Engines { @@ -40,10 +43,11 @@ public: static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "KeplerCompute Regs has wrong size"); - MemoryManager& memory_manager; - /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); + +private: + MemoryManager& memory_manager; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index aae2a4019..cd51a31d7 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -5,16 +5,17 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/engines/kepler_memory.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" namespace Tegra::Engines { KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {} + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} KeplerMemory::~KeplerMemory() = default; @@ -40,17 +41,13 @@ void KeplerMemory::ProcessData(u32 data) { ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); - const GPUVAddr address = regs.dest.Address(); - const auto dest_address = - memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32)); - ASSERT_MSG(dest_address, "Invalid GPU address"); - // We have to invalidate the destination region to evict any outdated surfaces from the cache. - // We do this before actually writing the new data because the destination address might contain - // a dirty surface that will have to be written back to memory. - Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); + // We do this before actually writing the new data because the destination address might + // contain a dirty surface that will have to be written back to memory. + const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; + rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); + memory_manager.Write<u32>(address, data); - Memory::Write32(*dest_address, data); system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); state.write_offset++; diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 9181e9d80..78b6c3e45 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -10,12 +10,15 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" -#include "video_core/memory_manager.h" namespace Core { class System; } +namespace Tegra { +class MemoryManager; +} + namespace VideoCore { class RasterizerInterface; } @@ -82,8 +85,8 @@ public: private: Core::System& system; - MemoryManager& memory_manager; VideoCore::RasterizerInterface& rasterizer; + MemoryManager& memory_manager; void ProcessData(u32 data); }; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 144e7fa82..74403eed4 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -7,11 +7,10 @@ #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/memory.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" -#include "video_core/renderer_base.h" #include "video_core/textures/texture.h" namespace Tegra::Engines { @@ -21,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : memory_manager(memory_manager), system{system}, rasterizer{rasterizer}, - macro_interpreter(*this) { + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{ + *this} { InitializeRegisterDefaults(); } @@ -250,6 +249,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessQueryGet(); break; } + case MAXWELL3D_REG_INDEX(sync_info): { + ProcessSyncPoint(); + break; + } default: break; } @@ -270,11 +273,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) { } void Maxwell3D::ProcessQueryGet() { - GPUVAddr sequence_address = regs.query.QueryAddress(); + const GPUVAddr sequence_address{regs.query.QueryAddress()}; // Since the sequence address is given as a GPU VAddr, we have to convert it to an application // VAddr before writing. - const auto address = memory_manager.GpuToCpuAddress(sequence_address); - ASSERT_MSG(address, "Invalid GPU address"); // TODO(Subv): Support the other query units. ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, @@ -309,7 +310,7 @@ void Maxwell3D::ProcessQueryGet() { // Write the current query sequence to the sequence address. // TODO(Subv): Find out what happens if you use a long query type but mark it as a short // query. - Memory::Write32(*address, sequence); + memory_manager.Write<u32>(sequence_address, sequence); } else { // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast // GPU, this command may actually take a while to complete in real hardware due to GPU @@ -318,7 +319,7 @@ void Maxwell3D::ProcessQueryGet() { query_result.value = result; // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming query_result.timestamp = system.CoreTiming().GetTicks(); - Memory::WriteBlock(*address, &query_result, sizeof(query_result)); + memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); } dirty_flags.OnMemoryWrite(); break; @@ -329,6 +330,14 @@ void Maxwell3D::ProcessQueryGet() { } } +void Maxwell3D::ProcessSyncPoint() { + const u32 sync_point = regs.sync_info.sync_point.Value(); + const u32 increment = regs.sync_info.increment.Value(); + const u32 cache_flush = regs.sync_info.unknown.Value(); + LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment, + cache_flush); +} + void Maxwell3D::DrawArrays() { LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), regs.vertex_buffer.count); @@ -393,10 +402,12 @@ void Maxwell3D::ProcessCBData(u32 value) { // Don't allow writing past the end of the buffer. ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); - const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); - ASSERT_MSG(address, "Invalid GPU address"); + const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; + + u8* ptr{memory_manager.GetPointer(address)}; + rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); + memory_manager.Write<u32>(address, value); - Memory::Write32(*address, value); dirty_flags.OnMemoryWrite(); // Increment the current buffer position. @@ -404,14 +415,10 @@ void Maxwell3D::ProcessCBData(u32 value) { } Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { - const GPUVAddr tic_base_address = regs.tic.TICAddress(); - - const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); - const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu); - ASSERT_MSG(tic_address_cpu, "Invalid GPU address"); + const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; Texture::TICEntry tic_entry; - Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); + memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || tic_entry.header_version == Texture::TICHeaderVersion::Pitch, @@ -429,14 +436,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { } Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { - const GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); - - const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); - const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu); - ASSERT_MSG(tsc_address_cpu, "Invalid GPU address"); + const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; Texture::TSCEntry tsc_entry; - Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); + memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; } @@ -455,10 +458,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { - const auto address = memory_manager.GpuToCpuAddress(current_texture); - ASSERT_MSG(address, "Invalid GPU address"); - - const Texture::TextureHandle tex_handle{Memory::Read32(*address)}; + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)}; Texture::FullTextureInfo tex_info{}; // TODO(Subv): Use the shader to determine which textures are actually accessed. @@ -493,10 +493,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); - const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); - ASSERT_MSG(tex_address_cpu, "Invalid GPU address"); - - const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)}; + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; Texture::FullTextureInfo tex_info{}; tex_info.index = static_cast<u32>(offset); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 7fbf1026e..321af3297 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -16,13 +16,16 @@ #include "common/math_util.h" #include "video_core/gpu.h" #include "video_core/macro_interpreter.h" -#include "video_core/memory_manager.h" #include "video_core/textures/texture.h" namespace Core { class System; } +namespace Tegra { +class MemoryManager; +} + namespace VideoCore { class RasterizerInterface; } @@ -576,7 +579,17 @@ public: u32 bind; } macros; - INSERT_PADDING_WORDS(0x188); + INSERT_PADDING_WORDS(0x69); + + struct { + union { + BitField<0, 16, u32> sync_point; + BitField<16, 1, u32> unknown; + BitField<20, 1, u32> increment; + }; + } sync_info; + + INSERT_PADDING_WORDS(0x11E); u32 tfb_enabled; @@ -1093,7 +1106,6 @@ public: }; State state{}; - MemoryManager& memory_manager; struct DirtyFlags { std::bitset<8> color_buffer{0xFF}; @@ -1141,6 +1153,8 @@ private: VideoCore::RasterizerInterface& rasterizer; + MemoryManager& memory_manager; + /// Start offsets of each macro in macro_memory std::unordered_map<u32, u32> macro_offsets; @@ -1180,6 +1194,9 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); + /// Handles writes to syncing register. + void ProcessSyncPoint(); + /// Handles a write to the CB_DATA[i] register. void ProcessCBData(u32 value); @@ -1195,6 +1212,7 @@ private: "Field " #field_name " has invalid position") ASSERT_REG_POSITION(macros, 0x45); +ASSERT_REG_POSITION(sync_info, 0xB2); ASSERT_REG_POSITION(tfb_enabled, 0x1D1); ASSERT_REG_POSITION(rt, 0x200); ASSERT_REG_POSITION(viewport_transform, 0x280); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 9dfea5999..2426d0067 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -5,17 +5,18 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" #include "video_core/textures/decoders.h" namespace Tegra::Engines { MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {} + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { ASSERT_MSG(method_call.method < Regs::NUM_REGS, @@ -42,11 +43,6 @@ void MaxwellDMA::HandleCopy() { const GPUVAddr source = regs.src_address.Address(); const GPUVAddr dest = regs.dst_address.Address(); - const auto source_cpu = memory_manager.GpuToCpuAddress(source); - const auto dest_cpu = memory_manager.GpuToCpuAddress(dest); - ASSERT_MSG(source_cpu, "Invalid source GPU address"); - ASSERT_MSG(dest_cpu, "Invalid destination GPU address"); - // TODO(Subv): Perform more research and implement all features of this engine. ASSERT(regs.exec.enable_swizzle == 0); ASSERT(regs.exec.query_mode == Regs::QueryMode::None); @@ -69,7 +65,7 @@ void MaxwellDMA::HandleCopy() { // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, // y_count). if (!regs.exec.enable_2d) { - Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count); + memory_manager.CopyBlock(dest, source, regs.x_count); return; } @@ -78,9 +74,9 @@ void MaxwellDMA::HandleCopy() { // rectangle. There is no need to manually flush/invalidate the regions because // CopyBlock does that for us. for (u32 line = 0; line < regs.y_count; ++line) { - const VAddr source_line = *source_cpu + line * regs.src_pitch; - const VAddr dest_line = *dest_cpu + line * regs.dst_pitch; - Memory::CopyBlock(dest_line, source_line, regs.x_count); + const GPUVAddr source_line = source + line * regs.src_pitch; + const GPUVAddr dest_line = dest + line * regs.dst_pitch; + memory_manager.CopyBlock(dest_line, source_line, regs.x_count); } return; } @@ -89,15 +85,28 @@ void MaxwellDMA::HandleCopy() { const std::size_t copy_size = regs.x_count * regs.y_count; + auto source_ptr{memory_manager.GetPointer(source)}; + auto dst_ptr{memory_manager.GetPointer(dest)}; + + if (!source_ptr) { + LOG_ERROR(HW_GPU, "source_ptr is invalid"); + return; + } + + if (!dst_ptr) { + LOG_ERROR(HW_GPU, "dst_ptr is invalid"); + return; + } + const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated // copying. - Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); + rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size); // We have to invalidate the destination region to evict any outdated surfaces from the // cache. We do this before actually writing the new data because the destination address // might contain a dirty surface that will have to be written back to memory. - Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); + rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); }; if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { @@ -110,8 +119,8 @@ void MaxwellDMA::HandleCopy() { copy_size * src_bytes_per_pixel); Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, - regs.src_params.size_x, src_bytes_per_pixel, *source_cpu, - *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x, + regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, + regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y); } else { ASSERT(regs.dst_params.size_z == 1); @@ -124,7 +133,7 @@ void MaxwellDMA::HandleCopy() { // If the input is linear and the output is tiled, swizzle the input and copy it over. Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, - src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight()); + src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); } } diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 34c369320..c6b649842 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -10,12 +10,15 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" -#include "video_core/memory_manager.h" namespace Core { class System; } +namespace Tegra { +class MemoryManager; +} + namespace VideoCore { class RasterizerInterface; } @@ -139,13 +142,13 @@ public: }; } regs{}; - MemoryManager& memory_manager; - private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; + MemoryManager& memory_manager; + /// Performs the copy from the source buffer to the destination buffer as configured in the /// registers. void HandleCopy(); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7f613370b..363e53be1 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1662,7 +1662,7 @@ private: INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), - INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), + INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"), INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"), INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"), |
