From 2c27127d04a155fe0f893e84263d58f14473785d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 28 Dec 2020 01:02:06 -0500 Subject: nvdec syncpt incorporation laying the groundwork for async gpu, although this does not fully implement async nvdec operations --- .../hle/service/nvdrv/devices/nvhost_nvdec.cpp | 5 +++-- src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | 3 ++- .../service/nvdrv/devices/nvhost_nvdec_common.cpp | 26 ++++++++++++++++------ .../service/nvdrv/devices/nvhost_nvdec_common.h | 14 ++++++++---- src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | 5 +++-- src/core/hle/service/nvdrv/devices/nvhost_vic.h | 4 ++-- src/core/hle/service/nvdrv/nvdrv.cpp | 6 +++-- 7 files changed, 43 insertions(+), 20 deletions(-) (limited to 'src/core') diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index d8735491c..36970f828 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -11,8 +11,9 @@ namespace Service::Nvidia::Devices { -nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr nvmap_dev) - : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} +nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {} nvhost_nvdec::~nvhost_nvdec() = default; NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector& input, diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 79b8b6de1..77ef53cdd 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -11,7 +11,8 @@ namespace Service::Nvidia::Devices { class nvhost_nvdec final : public nvhost_nvdec_common { public: - explicit nvhost_nvdec(Core::System& system, std::shared_ptr nvmap_dev); + explicit nvhost_nvdec(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_nvdec() override; NvResult Ioctl1(Ioctl command, const std::vector& input, std::vector& output) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index b49cecb42..64370ad4c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -11,6 +11,7 @@ #include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" #include "core/hle/service/nvdrv/devices/nvmap.h" +#include "core/hle/service/nvdrv/syncpoint_manager.h" #include "core/memory.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" @@ -36,8 +37,9 @@ std::size_t WriteVectors(std::vector& dst, const std::vector& src, std::s } } // Anonymous namespace -nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr nvmap_dev) - : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} +nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager(syncpoint_manager) {} nvhost_nvdec_common::~nvhost_nvdec_common() = default; NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector& input) { @@ -71,10 +73,14 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vectorGetObject(cmd_buffer.memory_id); ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); @@ -89,6 +95,10 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vector& input, std::vector& input, std::ve std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); - // We found that implementing this causes deadlocks with async gpu, along with degraded - // performance. TODO: RE the nvdec async implementation - params.value = 0; + if (device_syncpoints[params.param] == 0) { + device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint(); + } + params.value = device_syncpoints[params.param]; std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); return NvResult::Success; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index d9f95ba58..4c9d4ba41 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -10,12 +10,16 @@ #include "common/swap.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" -namespace Service::Nvidia::Devices { +namespace Service::Nvidia { +class SyncpointManager; + +namespace Devices { class nvmap; class nvhost_nvdec_common : public nvdevice { public: - explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr nvmap_dev); + explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_nvdec_common() override; protected: @@ -157,8 +161,10 @@ protected: s32_le nvmap_fd{}; u32_le submit_timeout{}; std::shared_ptr nvmap_dev; - + SyncpointManager& syncpoint_manager; + std::array device_syncpoints{}; // This is expected to be ordered, therefore we must use a map, not unordered_map std::map buffer_mappings; }; -}; // namespace Service::Nvidia::Devices +}; // namespace Devices +} // namespace Service::Nvidia diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 805fe86ae..72499654c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -10,8 +10,9 @@ #include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { -nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr nvmap_dev) - : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} +nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager) + : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {} nvhost_vic::~nvhost_vic() = default; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index b2e11f4d4..f401c61fa 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -7,11 +7,11 @@ #include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" namespace Service::Nvidia::Devices { -class nvmap; class nvhost_vic final : public nvhost_nvdec_common { public: - explicit nvhost_vic(Core::System& system, std::shared_ptr nvmap_dev); + explicit nvhost_vic(Core::System& system, std::shared_ptr nvmap_dev, + SyncpointManager& syncpoint_manager); ~nvhost_vic(); NvResult Ioctl1(Ioctl command, const std::vector& input, std::vector& output) override; diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index e03195afe..620c18728 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -55,9 +55,11 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { devices["/dev/nvdisp_disp0"] = std::make_shared(system, nvmap_dev); devices["/dev/nvhost-ctrl"] = std::make_shared(system, events_interface, syncpoint_manager); - devices["/dev/nvhost-nvdec"] = std::make_shared(system, nvmap_dev); + devices["/dev/nvhost-nvdec"] = + std::make_shared(system, nvmap_dev, syncpoint_manager); devices["/dev/nvhost-nvjpg"] = std::make_shared(system); - devices["/dev/nvhost-vic"] = std::make_shared(system, nvmap_dev); + devices["/dev/nvhost-vic"] = + std::make_shared(system, nvmap_dev, syncpoint_manager); } Module::~Module() = default; -- cgit v1.2.3 From 06cef3355e415be83db3bc6d19b022de0b977580 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 28 Dec 2020 01:21:41 -0500 Subject: fix for nvdec disabled, cleanup host1x --- .../service/nvdrv/devices/nvhost_nvdec_common.cpp | 25 ++++++----- src/video_core/command_classes/host1x.cpp | 21 +++------- src/video_core/command_classes/host1x.h | 49 ++-------------------- 3 files changed, 23 insertions(+), 72 deletions(-) (limited to 'src/core') diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 64370ad4c..4898dc27a 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -73,14 +73,15 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vectorGetObject(cmd_buffer.memory_id); ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); @@ -95,11 +96,13 @@ NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vector& input, std::ve std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); - if (device_syncpoints[params.param] == 0) { + if (device_syncpoints[params.param] == 0 && system.GPU().UseNvdec()) { device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint(); } params.value = device_syncpoints[params.param]; diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp index 9d0a1b4d9..b12494528 100644 --- a/src/video_core/command_classes/host1x.cpp +++ b/src/video_core/command_classes/host1x.cpp @@ -10,22 +10,14 @@ Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {} Tegra::Host1x::~Host1x() = default; -void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { - u8* const state_offset = reinterpret_cast(&state) + offset * sizeof(u32); - std::memcpy(state_offset, &arguments, sizeof(u32)); -} - -void Tegra::Host1x::ProcessMethod(Method method, const std::vector& arguments) { - StateWrite(static_cast(method), arguments[0]); +void Tegra::Host1x::ProcessMethod(Method method, u32 argument) { switch (method) { - case Method::WaitSyncpt: - Execute(arguments[0]); - break; case Method::LoadSyncptPayload32: - syncpoint_value = arguments[0]; + syncpoint_value = argument; break; + case Method::WaitSyncpt: case Method::WaitSyncpt32: - Execute(arguments[0]); + Execute(argument); break; default: UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast(method)); @@ -34,8 +26,5 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector& argumen } void Tegra::Host1x::Execute(u32 data) { - u32 syncpointId = (data & 0xFF); - u32 threshold = state.load_syncpoint_payload32; - - gpu.WaitFence(syncpointId, threshold); + gpu.WaitFence(data, syncpoint_value); } diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h index 013eaa0c1..7e94799dd 100644 --- a/src/video_core/command_classes/host1x.h +++ b/src/video_core/command_classes/host1x.h @@ -14,64 +14,23 @@ class Nvdec; class Host1x { public: - struct Host1xClassRegisters { - u32 incr_syncpt{}; - u32 incr_syncpt_ctrl{}; - u32 incr_syncpt_error{}; - INSERT_PADDING_WORDS(5); - u32 wait_syncpt{}; - u32 wait_syncpt_base{}; - u32 wait_syncpt_incr{}; - u32 load_syncpt_base{}; - u32 incr_syncpt_base{}; - u32 clear{}; - u32 wait{}; - u32 wait_with_interrupt{}; - u32 delay_use{}; - u32 tick_count_high{}; - u32 tick_count_low{}; - u32 tick_ctrl{}; - INSERT_PADDING_WORDS(23); - u32 ind_ctrl{}; - u32 ind_off2{}; - u32 ind_off{}; - std::array ind_data{}; - INSERT_PADDING_WORDS(1); - u32 load_syncpoint_payload32{}; - u32 stall_ctrl{}; - u32 wait_syncpt32{}; - u32 wait_syncpt_base32{}; - u32 load_syncpt_base32{}; - u32 incr_syncpt_base32{}; - u32 stall_count_high{}; - u32 stall_count_low{}; - u32 xref_ctrl{}; - u32 channel_xref_high{}; - u32 channel_xref_low{}; - }; - static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size"); - enum class Method : u32 { - WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4, - LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4, - WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4, + WaitSyncpt = 0x8, + LoadSyncptPayload32 = 0x4e, + WaitSyncpt32 = 0x50, }; explicit Host1x(GPU& gpu); ~Host1x(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, const std::vector& arguments); + void ProcessMethod(Method method, u32 argument); private: /// For Host1x, execute is waiting on a syncpoint previously written into the state void Execute(u32 data); - /// Write argument into the provided offset - void StateWrite(u32 offset, u32 arguments); - u32 syncpoint_value{}; - Host1xClassRegisters state{}; GPU& gpu; }; -- cgit v1.2.3