diff options
Diffstat (limited to 'src')
89 files changed, 4883 insertions, 1015 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 0fb5d9708..e50ab2922 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -150,6 +150,8 @@ add_library(common STATIC scope_exit.h spin_lock.cpp spin_lock.h + stream.cpp + stream.h string_util.cpp string_util.h swap.h diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index 1c1d09ccb..e186ed880 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -91,7 +91,7 @@ void Fiber::Rewind() { SwitchToFiber(impl->rewind_handle); } -void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) { +void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) { ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); ASSERT_MSG(to != nullptr, "Next fiber is null!"); to->guard.lock(); @@ -199,7 +199,7 @@ void Fiber::Rewind() { boost::context::detail::jump_fcontext(impl->rewind_context, this); } -void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) { +void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) { ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); ASSERT_MSG(to != nullptr, "Next fiber is null!"); to->guard.lock(); diff --git a/src/common/fiber.h b/src/common/fiber.h index 89dde5e36..cefd61df9 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -46,7 +46,7 @@ public: /// Yields control from Fiber 'from' to Fiber 'to' /// Fiber 'from' must be the currently running fiber. - static void YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to); + static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to); [[nodiscard]] static std::shared_ptr<Fiber> ThreadToFiber(); void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter); diff --git a/src/common/stream.cpp b/src/common/stream.cpp new file mode 100644 index 000000000..bf0496c26 --- /dev/null +++ b/src/common/stream.cpp @@ -0,0 +1,47 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <stdexcept> +#include "common/common_types.h" +#include "common/stream.h" + +namespace Common { + +Stream::Stream() = default; +Stream::~Stream() = default; + +void Stream::Seek(s32 offset, SeekOrigin origin) { + if (origin == SeekOrigin::SetOrigin) { + if (offset < 0) { + position = 0; + } else if (position >= buffer.size()) { + position = buffer.size(); + } else { + position = offset; + } + } else if (origin == SeekOrigin::FromCurrentPos) { + Seek(static_cast<s32>(position) + offset, SeekOrigin::SetOrigin); + } else if (origin == SeekOrigin::FromEnd) { + Seek(static_cast<s32>(buffer.size()) - offset, SeekOrigin::SetOrigin); + } +} + +u8 Stream::ReadByte() { + if (position < buffer.size()) { + return buffer[position++]; + } else { + throw std::out_of_range("Attempting to read a byte not within the buffer range"); + } +} + +void Stream::WriteByte(u8 byte) { + if (position == buffer.size()) { + buffer.push_back(byte); + position++; + } else { + buffer.insert(buffer.begin() + position, byte); + } +} + +} // namespace Common diff --git a/src/common/stream.h b/src/common/stream.h new file mode 100644 index 000000000..2585c16af --- /dev/null +++ b/src/common/stream.h @@ -0,0 +1,50 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include "common/common_types.h" + +namespace Common { + +enum class SeekOrigin { + SetOrigin, + FromCurrentPos, + FromEnd, +}; + +class Stream { +public: + /// Stream creates a bitstream and provides common functionality on the stream. + explicit Stream(); + ~Stream(); + + /// Reposition bitstream "cursor" to the specified offset from origin + void Seek(s32 offset, SeekOrigin origin); + + /// Reads next byte in the stream buffer and increments position + u8 ReadByte(); + + /// Writes byte at current position + void WriteByte(u8 byte); + + std::size_t GetPosition() const { + return position; + } + + std::vector<u8>& GetBuffer() { + return buffer; + } + + const std::vector<u8>& GetBuffer() const { + return buffer; + } + +private: + std::vector<u8> buffer; + std::size_t position{0}; +}; + +} // namespace Common diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index db1c9fdef..e0f207f3e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -439,6 +439,8 @@ add_library(core STATIC hle/service/nvdrv/devices/nvhost_gpu.h hle/service/nvdrv/devices/nvhost_nvdec.cpp hle/service/nvdrv/devices/nvhost_nvdec.h + hle/service/nvdrv/devices/nvhost_nvdec_common.cpp + hle/service/nvdrv/devices/nvhost_nvdec_common.h hle/service/nvdrv/devices/nvhost_nvjpg.cpp hle/service/nvdrv/devices/nvhost_nvjpg.h hle/service/nvdrv/devices/nvhost_vic.cpp diff --git a/src/core/core.cpp b/src/core/core.cpp index 81e8cc338..fde2ccc09 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -40,6 +40,7 @@ #include "core/hle/service/lm/manager.h" #include "core/hle/service/service.h" #include "core/hle/service/sm/sm.h" +#include "core/hle/service/time/time_manager.h" #include "core/loader/loader.h" #include "core/memory.h" #include "core/memory/cheat_engine.h" @@ -121,7 +122,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, struct System::Impl { explicit Impl(System& system) : kernel{system}, fs_controller{system}, memory{system}, - cpu_manager{system}, reporter{system}, applet_manager{system} {} + cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {} ResultStatus Run() { status = ResultStatus::Success; @@ -189,6 +190,9 @@ struct System::Impl { return ResultStatus::ErrorVideoCore; } + // Initialize time manager, which must happen after kernel is created + time_manager.Initialize(); + is_powered_on = true; exit_lock = false; @@ -387,6 +391,7 @@ struct System::Impl { /// Service State Service::Glue::ARPManager arp_manager; Service::LM::Manager lm_manager{reporter}; + Service::Time::TimeManager time_manager; /// Service manager std::shared_ptr<Service::SM::ServiceManager> service_manager; @@ -717,6 +722,14 @@ const Service::LM::Manager& System::GetLogManager() const { return impl->lm_manager; } +Service::Time::TimeManager& System::GetTimeManager() { + return impl->time_manager; +} + +const Service::Time::TimeManager& System::GetTimeManager() const { + return impl->time_manager; +} + void System::SetExitLock(bool locked) { impl->exit_lock = locked; } diff --git a/src/core/core.h b/src/core/core.h index 27efe30bb..6db896bae 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -69,6 +69,10 @@ namespace SM { class ServiceManager; } // namespace SM +namespace Time { +class TimeManager; +} // namespace Time + } // namespace Service namespace Tegra { @@ -361,6 +365,10 @@ public: const Service::LM::Manager& GetLogManager() const; + Service::Time::TimeManager& GetTimeManager(); + + const Service::Time::TimeManager& GetTimeManager() const; + void SetExitLock(bool locked); bool GetExitLock() const; diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 688b99eba..983210197 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -365,6 +365,8 @@ void CpuManager::RunThread(std::size_t core) { data.enter_barrier.reset(); data.exit_barrier.reset(); data.initialized = false; + + MicroProfileOnThreadExit(); } } // namespace Core diff --git a/src/core/frontend/applets/controller.cpp b/src/core/frontend/applets/controller.cpp index c5d65f2d0..5582091f4 100644 --- a/src/core/frontend/applets/controller.cpp +++ b/src/core/frontend/applets/controller.cpp @@ -19,7 +19,7 @@ DefaultControllerApplet::DefaultControllerApplet(Service::SM::ServiceManager& se DefaultControllerApplet::~DefaultControllerApplet() = default; void DefaultControllerApplet::ReconfigureControllers(std::function<void()> callback, - ControllerParameters parameters) const { + const ControllerParameters& parameters) const { LOG_INFO(Service_HID, "called, deducing the best configuration based on the given parameters!"); auto& npad = diff --git a/src/core/frontend/applets/controller.h b/src/core/frontend/applets/controller.h index 3e49cdbb9..dff71d8d9 100644 --- a/src/core/frontend/applets/controller.h +++ b/src/core/frontend/applets/controller.h @@ -38,7 +38,7 @@ public: virtual ~ControllerApplet(); virtual void ReconfigureControllers(std::function<void()> callback, - ControllerParameters parameters) const = 0; + const ControllerParameters& parameters) const = 0; }; class DefaultControllerApplet final : public ControllerApplet { @@ -47,7 +47,7 @@ public: ~DefaultControllerApplet() override; void ReconfigureControllers(std::function<void()> callback, - ControllerParameters parameters) const override; + const ControllerParameters& parameters) const override; private: Service::SM::ServiceManager& service_manager; diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index b2b5b8adf..bb3e312a7 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -86,8 +86,6 @@ struct KernelCore::Impl { } cores.clear(); - registered_core_threads.reset(); - process_list.clear(); current_process = nullptr; @@ -199,9 +197,7 @@ struct KernelCore::Impl { const auto it = std::find(register_host_thread_keys.begin(), end, this_id); ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); ASSERT(it == end); - ASSERT(!registered_core_threads[core_id]); InsertHostThread(static_cast<u32>(core_id)); - registered_core_threads.set(core_id); } void RegisterHostThread() { @@ -332,7 +328,6 @@ struct KernelCore::Impl { // 0-3 IDs represent core threads, >3 represent others std::atomic<u32> registered_thread_ids{Core::Hardware::NUM_CPU_CORES}; - std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads; // Number of host threads is a relatively high number to avoid overflowing static constexpr size_t NUM_REGISTRABLE_HOST_THREADS = 64; diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index ff9d9248b..b17529dee 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -4,6 +4,7 @@ #include <algorithm> #include <bitset> +#include <ctime> #include <memory> #include <random> #include "common/alignment.h" @@ -123,7 +124,7 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name, : kernel.CreateNewUserProcessID(); process->capabilities.InitializeForMetadatalessProcess(); - std::mt19937 rng(Settings::values.rng_seed.GetValue().value_or(0)); + std::mt19937 rng(Settings::values.rng_seed.GetValue().value_or(std::time(nullptr))); std::uniform_int_distribution<u64> distribution; std::generate(process->random_entropy.begin(), process->random_entropy.end(), [&] { return distribution(rng); }); diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index d7a81f64a..2ce742e35 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -1201,6 +1201,8 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_) {151, nullptr, "TryPopFromNotificationStorageChannel"}, {160, nullptr, "GetHealthWarningDisappearedSystemEvent"}, {170, nullptr, "SetHdcpAuthenticationActivated"}, + {180, nullptr, "GetLaunchRequiredVersion"}, + {181, nullptr, "UpgradeLaunchRequiredVersion"}, {500, nullptr, "StartContinuousRecordingFlushForDebug"}, {1000, nullptr, "CreateMovieMaker"}, {1001, nullptr, "PrepareForJit"}, diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 8918946a1..50f709b25 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -260,7 +260,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) { {404, nullptr, "HasLeftRightBattery"}, {405, nullptr, "GetNpadInterfaceType"}, {406, nullptr, "GetNpadLeftRightInterfaceType"}, - {407, nullptr, "GetNpadOfHighestBatteryLevelForJoyLeft"}, + {407, nullptr, "GetNpadOfHighestBatteryLevel"}, {408, nullptr, "GetNpadOfHighestBatteryLevelForJoyRight"}, {500, nullptr, "GetPalmaConnectionHandle"}, {501, nullptr, "InitializePalma"}, diff --git a/src/core/hle/service/mii/mii.cpp b/src/core/hle/service/mii/mii.cpp index b81bf6277..d7080b715 100644 --- a/src/core/hle/service/mii/mii.cpp +++ b/src/core/hle/service/mii/mii.cpp @@ -47,6 +47,7 @@ public: {23, nullptr, "Convert"}, {24, nullptr, "ConvertCoreDataToCharInfo"}, {25, nullptr, "ConvertCharInfoToCoreData"}, + {26, nullptr, "Append"}, }; // clang-format on diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index fcb612864..b6df48360 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -2,15 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <cstring> - #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { -nvhost_nvdec::nvhost_nvdec(Core::System& system) : nvdevice(system) {} +nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) + : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} nvhost_nvdec::~nvhost_nvdec() = default; u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -21,7 +23,7 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std:: switch (static_cast<IoctlCommand>(command.raw)) { case IoctlCommand::IocSetNVMAPfdCommand: - return SetNVMAPfd(input, output); + return SetNVMAPfd(input); case IoctlCommand::IocSubmit: return Submit(input, output); case IoctlCommand::IocGetSyncpoint: @@ -29,79 +31,29 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std:: case IoctlCommand::IocGetWaitbase: return GetWaitbase(input, output); case IoctlCommand::IocMapBuffer: - return MapBuffer(input, output); + case IoctlCommand::IocMapBuffer2: + case IoctlCommand::IocMapBuffer3: case IoctlCommand::IocMapBufferEx: - return MapBufferEx(input, output); - case IoctlCommand::IocUnmapBufferEx: - return UnmapBufferEx(input, output); + return MapBuffer(input, output); + case IoctlCommand::IocUnmapBufferEx: { + // This command is sent when the video stream has ended, flush all video contexts + // This is usually sent in the folowing order: vic, nvdec, vic. + // Inform the GPU to clear any remaining nvdec buffers when this is detected. + LOG_INFO(Service_NVDRV, "NVDEC video stream ended"); + Tegra::ChCommandHeaderList cmdlist(1); + cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F}; + system.GPU().PushCommandBuffer(cmdlist); + [[fallthrough]]; // fallthrough to unmap buffers + }; + case IoctlCommand::IocUnmapBuffer: + case IoctlCommand::IocUnmapBuffer2: + case IoctlCommand::IocUnmapBuffer3: + return UnmapBuffer(input, output); + case IoctlCommand::IocSetSubmitTimeout: + return SetSubmitTimeout(input, output); } - UNIMPLEMENTED_MSG("Unimplemented ioctl"); - return 0; -} - -u32 nvhost_nvdec::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlSetNvmapFD params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSetNvmapFD)); - LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); - - nvmap_fd = params.nvmap_fd; - return 0; -} - -u32 nvhost_nvdec::Submit(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlSubmit params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); - return 0; -} - -u32 nvhost_nvdec::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlGetSyncpoint params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); - return 0; -} - -u32 nvhost_nvdec::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlGetWaitbase params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetWaitbase)); - return 0; -} - -u32 nvhost_nvdec::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlMapBuffer params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer)); - return 0; -} - -u32 nvhost_nvdec::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlMapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBufferEx)); - return 0; -} - -u32 nvhost_nvdec::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlUnmapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlUnmapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - std::memcpy(output.data(), ¶ms, sizeof(IoctlUnmapBufferEx)); + UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw); return 0; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 4332db118..102777ddd 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -4,16 +4,14 @@ #pragma once -#include <vector> -#include "common/common_types.h" -#include "common/swap.h" -#include "core/hle/service/nvdrv/devices/nvdevice.h" +#include <memory> +#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" namespace Service::Nvidia::Devices { -class nvhost_nvdec final : public nvdevice { +class nvhost_nvdec final : public nvhost_nvdec_common { public: - explicit nvhost_nvdec(Core::System& system); + explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); ~nvhost_nvdec() override; u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -27,62 +25,15 @@ private: IocGetSyncpoint = 0xC0080002, IocGetWaitbase = 0xC0080003, IocMapBuffer = 0xC01C0009, + IocMapBuffer2 = 0xC16C0009, + IocMapBuffer3 = 0xC15C0009, IocMapBufferEx = 0xC0A40009, - IocUnmapBufferEx = 0xC0A4000A, + IocUnmapBuffer = 0xC0A4000A, + IocUnmapBuffer2 = 0xC16C000A, + IocUnmapBufferEx = 0xC01C000A, + IocUnmapBuffer3 = 0xC15C000A, + IocSetSubmitTimeout = 0x40040007, }; - - struct IoctlSetNvmapFD { - u32_le nvmap_fd; - }; - static_assert(sizeof(IoctlSetNvmapFD) == 0x4, "IoctlSetNvmapFD is incorrect size"); - - struct IoctlSubmit { - INSERT_PADDING_BYTES(0x40); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit has incorrect size"); - - struct IoctlGetSyncpoint { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetSyncpoint) == 0x08, "IoctlGetSyncpoint has incorrect size"); - - struct IoctlGetWaitbase { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetWaitbase) == 0x08, "IoctlGetWaitbase has incorrect size"); - - struct IoctlMapBuffer { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size"); - - struct IoctlMapBufferEx { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x98); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlMapBufferEx) == 0xA4, "IoctlMapBufferEx has incorrect size"); - - struct IoctlUnmapBufferEx { - INSERT_PADDING_BYTES(0xA4); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlUnmapBufferEx) == 0xA4, "IoctlUnmapBufferEx has incorrect size"); - - u32_le nvmap_fd{}; - - u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); - u32 Submit(const std::vector<u8>& input, std::vector<u8>& output); - u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output); - u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); - u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output); - u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output); - u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output); }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp new file mode 100644 index 000000000..85792495f --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -0,0 +1,234 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstring> + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "core/core.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" +#include "core/hle/service/nvdrv/devices/nvmap.h" +#include "core/memory.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_base.h" + +namespace Service::Nvidia::Devices { + +namespace { +// Splice vectors will copy count amount of type T from the input vector into the dst vector. +template <typename T> +std::size_t SpliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count, + std::size_t offset) { + std::memcpy(dst.data(), input.data() + offset, count * sizeof(T)); + offset += count * sizeof(T); + return offset; +} + +// Write vectors will write data to the output buffer +template <typename T> +std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) { + std::memcpy(dst.data() + offset, src.data(), src.size() * sizeof(T)); + offset += src.size() * sizeof(T); + return offset; +} +} // Anonymous namespace + +namespace NvErrCodes { +constexpr u32 Success{}; +constexpr u32 OutOfMemory{static_cast<u32>(-12)}; +constexpr u32 InvalidInput{static_cast<u32>(-22)}; +} // namespace NvErrCodes + +nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) + : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} +nvhost_nvdec_common::~nvhost_nvdec_common() = default; + +u32 nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) { + IoctlSetNvmapFD params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlSetNvmapFD)); + LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); + + nvmap_fd = params.nvmap_fd; + return 0; +} + +u32 nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlSubmit params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); + LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); + + // Instantiate param buffers + std::size_t offset = sizeof(IoctlSubmit); + std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count); + std::vector<Reloc> relocs(params.relocation_count); + std::vector<u32> reloc_shifts(params.relocation_count); + std::vector<SyncptIncr> syncpt_increments(params.syncpoint_count); + std::vector<SyncptIncr> wait_checks(params.syncpoint_count); + std::vector<Fence> fences(params.fence_count); + + // Splice input into their respective buffers + offset = SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset); + offset = SpliceVectors(input, relocs, params.relocation_count, offset); + offset = SpliceVectors(input, reloc_shifts, params.relocation_count, offset); + offset = SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset); + offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); + offset = SpliceVectors(input, fences, params.fence_count, offset); + + // TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment + + auto& gpu = system.GPU(); + + for (const auto& cmd_buffer : command_buffers) { + auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); + ASSERT_OR_EXECUTE(object, return NvErrCodes::InvalidInput;); + const auto map = FindBufferMap(object->dma_map_addr); + if (!map) { + LOG_ERROR(Service_NVDRV, "Tried to submit an invalid offset 0x{:X} dma 0x{:X}", + object->addr, object->dma_map_addr); + return 0; + } + Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); + gpu.MemoryManager().ReadBlock(map->StartAddr() + cmd_buffer.offset, cmdlist.data(), + cmdlist.size() * sizeof(u32)); + gpu.PushCommandBuffer(cmdlist); + } + + std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); + // Some games expect command_buffers to be written back + offset = sizeof(IoctlSubmit); + offset = WriteVectors(output, command_buffers, offset); + offset = WriteVectors(output, relocs, offset); + offset = WriteVectors(output, reloc_shifts, offset); + offset = WriteVectors(output, syncpt_increments, offset); + offset = WriteVectors(output, wait_checks, offset); + + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlGetSyncpoint params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); + LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); + + // We found that implementing this causes deadlocks with async gpu, along with degraded + // performance. TODO: RE the nvdec async implementation + params.value = 0; + std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); + + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlGetWaitbase params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); + params.value = 0; // Seems to be hard coded at 0 + std::memcpy(output.data(), ¶ms, sizeof(IoctlGetWaitbase)); + return 0; +} + +u32 nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlMapBuffer params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); + std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); + + SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); + + auto& gpu = system.GPU(); + + for (auto& cmf_buff : cmd_buffer_handles) { + auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; + if (!object) { + LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); + std::memcpy(output.data(), ¶ms, output.size()); + return NvErrCodes::InvalidInput; + } + if (object->dma_map_addr == 0) { + // NVDEC and VIC memory is in the 32-bit address space + // MapAllocate32 will attempt to map a lower 32-bit value in the shared gpu memory space + const GPUVAddr low_addr = gpu.MemoryManager().MapAllocate32(object->addr, object->size); + object->dma_map_addr = static_cast<u32>(low_addr); + // Ensure that the dma_map_addr is indeed in the lower 32-bit address space. + ASSERT(object->dma_map_addr == low_addr); + } + if (!object->dma_map_addr) { + LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size); + } else { + cmf_buff.map_address = object->dma_map_addr; + AddBufferMap(object->dma_map_addr, object->size, object->addr, + object->status == nvmap::Object::Status::Allocated); + } + } + std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer)); + std::memcpy(output.data() + sizeof(IoctlMapBuffer), cmd_buffer_handles.data(), + cmd_buffer_handles.size() * sizeof(MapBufferEntry)); + + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlMapBuffer params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); + std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); + SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); + + auto& gpu = system.GPU(); + + for (auto& cmf_buff : cmd_buffer_handles) { + const auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; + if (!object) { + LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); + std::memcpy(output.data(), ¶ms, output.size()); + return NvErrCodes::InvalidInput; + } + if (const auto size{RemoveBufferMap(object->dma_map_addr)}; size) { + gpu.MemoryManager().Unmap(object->dma_map_addr, *size); + } else { + // This occurs quite frequently, however does not seem to impact functionality + LOG_DEBUG(Service_NVDRV, "invalid offset=0x{:X} dma=0x{:X}", object->addr, + object->dma_map_addr); + } + object->dma_map_addr = 0; + } + std::memset(output.data(), 0, output.size()); + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output) { + std::memcpy(&submit_timeout, input.data(), input.size()); + LOG_WARNING(Service_NVDRV, "(STUBBED) called"); + return NvErrCodes::Success; +} + +std::optional<nvhost_nvdec_common::BufferMap> nvhost_nvdec_common::FindBufferMap( + GPUVAddr gpu_addr) const { + const auto it = std::find_if( + buffer_mappings.begin(), buffer_mappings.upper_bound(gpu_addr), [&](const auto& entry) { + return (gpu_addr >= entry.second.StartAddr() && gpu_addr < entry.second.EndAddr()); + }); + + ASSERT(it != buffer_mappings.end()); + return it->second; +} + +void nvhost_nvdec_common::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, + bool is_allocated) { + buffer_mappings.insert_or_assign(gpu_addr, BufferMap{gpu_addr, size, cpu_addr, is_allocated}); +} + +std::optional<std::size_t> nvhost_nvdec_common::RemoveBufferMap(GPUVAddr gpu_addr) { + const auto iter{buffer_mappings.find(gpu_addr)}; + if (iter == buffer_mappings.end()) { + return std::nullopt; + } + std::size_t size = 0; + if (iter->second.IsAllocated()) { + size = iter->second.Size(); + } + buffer_mappings.erase(iter); + return size; +} + +} // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h new file mode 100644 index 000000000..c249c5349 --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -0,0 +1,168 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <map> +#include <vector> +#include "common/common_types.h" +#include "common/swap.h" +#include "core/hle/service/nvdrv/devices/nvdevice.h" + +namespace Service::Nvidia::Devices { +class nvmap; + +class nvhost_nvdec_common : public nvdevice { +public: + explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); + ~nvhost_nvdec_common() override; + + virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, + std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl, + IoctlVersion version) = 0; + +protected: + class BufferMap final { + public: + constexpr BufferMap() = default; + + constexpr BufferMap(GPUVAddr start_addr, std::size_t size) + : start_addr{start_addr}, end_addr{start_addr + size} {} + + constexpr BufferMap(GPUVAddr start_addr, std::size_t size, VAddr cpu_addr, + bool is_allocated) + : start_addr{start_addr}, end_addr{start_addr + size}, cpu_addr{cpu_addr}, + is_allocated{is_allocated} {} + + constexpr VAddr StartAddr() const { + return start_addr; + } + + constexpr VAddr EndAddr() const { + return end_addr; + } + + constexpr std::size_t Size() const { + return end_addr - start_addr; + } + + constexpr VAddr CpuAddr() const { + return cpu_addr; + } + + constexpr bool IsAllocated() const { + return is_allocated; + } + + private: + GPUVAddr start_addr{}; + GPUVAddr end_addr{}; + VAddr cpu_addr{}; + bool is_allocated{}; + }; + + struct IoctlSetNvmapFD { + u32_le nvmap_fd; + }; + static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size"); + + struct IoctlSubmitCommandBuffer { + u32_le id; + u32_le offset; + u32_le count; + }; + static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC, + "IoctlSubmitCommandBuffer is incorrect size"); + struct IoctlSubmit { + u32_le cmd_buffer_count; + u32_le relocation_count; + u32_le syncpoint_count; + u32_le fence_count; + }; + static_assert(sizeof(IoctlSubmit) == 0x10, "IoctlSubmit has incorrect size"); + + struct CommandBuffer { + s32 memory_id; + u32 offset; + s32 word_count; + }; + static_assert(sizeof(CommandBuffer) == 0xC, "CommandBuffer has incorrect size"); + + struct Reloc { + s32 cmdbuffer_memory; + s32 cmdbuffer_offset; + s32 target; + s32 target_offset; + }; + static_assert(sizeof(Reloc) == 0x10, "CommandBuffer has incorrect size"); + + struct SyncptIncr { + u32 id; + u32 increments; + }; + static_assert(sizeof(SyncptIncr) == 0x8, "CommandBuffer has incorrect size"); + + struct Fence { + u32 id; + u32 value; + }; + static_assert(sizeof(Fence) == 0x8, "CommandBuffer has incorrect size"); + + struct IoctlGetSyncpoint { + // Input + u32_le param; + // Output + u32_le value; + }; + static_assert(sizeof(IoctlGetSyncpoint) == 8, "IocGetIdParams has wrong size"); + + struct IoctlGetWaitbase { + u32_le unknown; // seems to be ignored? Nintendo added this + u32_le value; + }; + static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size"); + + struct IoctlMapBuffer { + u32_le num_entries; + u32_le data_address; // Ignored by the driver. + u32_le attach_host_ch_das; + }; + static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size"); + + struct IocGetIdParams { + // Input + u32_le param; + // Output + u32_le value; + }; + static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); + + // Used for mapping and unmapping command buffers + struct MapBufferEntry { + u32_le map_handle; + u32_le map_address; + }; + static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size"); + + /// Ioctl command implementations + u32 SetNVMAPfd(const std::vector<u8>& input); + u32 Submit(const std::vector<u8>& input, std::vector<u8>& output); + u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output); + u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); + u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output); + u32 UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output); + u32 SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output); + + std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const; + void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated); + std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr); + + u32_le nvmap_fd{}; + u32_le submit_timeout{}; + std::shared_ptr<nvmap> nvmap_dev; + + // This is expected to be ordered, therefore we must use a map, not unordered_map + std::map<GPUVAddr, BufferMap> buffer_mappings; +}; +}; // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 9da19ad56..60db54d00 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -2,15 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <cstring> - #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_vic.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { +nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) + : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} -nvhost_vic::nvhost_vic(Core::System& system) : nvdevice(system) {} nvhost_vic::~nvhost_vic() = default; u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -21,7 +23,7 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve switch (static_cast<IoctlCommand>(command.raw)) { case IoctlCommand::IocSetNVMAPfdCommand: - return SetNVMAPfd(input, output); + return SetNVMAPfd(input); case IoctlCommand::IocSubmit: return Submit(input, output); case IoctlCommand::IocGetSyncpoint: @@ -29,83 +31,19 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve case IoctlCommand::IocGetWaitbase: return GetWaitbase(input, output); case IoctlCommand::IocMapBuffer: - return MapBuffer(input, output); + case IoctlCommand::IocMapBuffer2: + case IoctlCommand::IocMapBuffer3: + case IoctlCommand::IocMapBuffer4: case IoctlCommand::IocMapBufferEx: return MapBuffer(input, output); + case IoctlCommand::IocUnmapBuffer: + case IoctlCommand::IocUnmapBuffer2: + case IoctlCommand::IocUnmapBuffer3: case IoctlCommand::IocUnmapBufferEx: - return UnmapBufferEx(input, output); + return UnmapBuffer(input, output); } - UNIMPLEMENTED_MSG("Unimplemented ioctl"); - return 0; -} - -u32 nvhost_vic::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlSetNvmapFD params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSetNvmapFD)); - LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); - - nvmap_fd = params.nvmap_fd; - return 0; -} - -u32 nvhost_vic::Submit(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlSubmit params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - - // Workaround for Luigi's Mansion 3, as nvhost_vic is not implemented for asynch GPU - params.command_buffer = {}; - - std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); - return 0; -} - -u32 nvhost_vic::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlGetSyncpoint params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); - return 0; -} - -u32 nvhost_vic::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlGetWaitbase params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetWaitbase)); - return 0; -} - -u32 nvhost_vic::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlMapBuffer params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer)); - return 0; -} - -u32 nvhost_vic::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlMapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBufferEx)); - return 0; -} - -u32 nvhost_vic::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlUnmapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlUnmapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - std::memcpy(output.data(), ¶ms, sizeof(IoctlUnmapBufferEx)); + UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw); return 0; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index a7bb7bbd5..f975b190c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -4,19 +4,15 @@ #pragma once -#include <array> -#include <vector> -#include "common/common_types.h" -#include "common/swap.h" -#include "core/hle/service/nvdrv/devices/nvdevice.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" namespace Service::Nvidia::Devices { +class nvmap; -class nvhost_vic final : public nvdevice { +class nvhost_vic final : public nvhost_nvdec_common { public: - explicit nvhost_vic(Core::System& system); - ~nvhost_vic() override; - + explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); + ~nvhost_vic(); u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl, IoctlVersion version) override; @@ -28,74 +24,14 @@ private: IocGetSyncpoint = 0xC0080002, IocGetWaitbase = 0xC0080003, IocMapBuffer = 0xC01C0009, + IocMapBuffer2 = 0xC0340009, + IocMapBuffer3 = 0xC0140009, + IocMapBuffer4 = 0xC00C0009, IocMapBufferEx = 0xC03C0009, - IocUnmapBufferEx = 0xC03C000A, - }; - - struct IoctlSetNvmapFD { - u32_le nvmap_fd; - }; - static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size"); - - struct IoctlSubmitCommandBuffer { - u32 id; - u32 offset; - u32 count; - }; - static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC, - "IoctlSubmitCommandBuffer is incorrect size"); - - struct IoctlSubmit { - u32 command_buffer_count; - u32 relocations_count; - u32 syncpt_count; - u32 wait_count; - std::array<IoctlSubmitCommandBuffer, 4> command_buffer; - }; - static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit is incorrect size"); - - struct IoctlGetSyncpoint { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetSyncpoint) == 0x8, "IoctlGetSyncpoint is incorrect size"); - - struct IoctlGetWaitbase { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size"); - - struct IoctlMapBuffer { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size"); - - struct IoctlMapBufferEx { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x30); // TODO(DarkLordZach): RE this structure + IocUnmapBuffer = 0xC03C000A, + IocUnmapBuffer2 = 0xC034000A, + IocUnmapBuffer3 = 0xC00C000A, + IocUnmapBufferEx = 0xC01C000A, }; - static_assert(sizeof(IoctlMapBufferEx) == 0x3C, "IoctlMapBufferEx is incorrect size"); - - struct IoctlUnmapBufferEx { - INSERT_PADDING_BYTES(0x3C); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlUnmapBufferEx) == 0x3C, "IoctlUnmapBufferEx is incorrect size"); - - u32_le nvmap_fd{}; - - u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); - u32 Submit(const std::vector<u8>& input, std::vector<u8>& output); - u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output); - u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); - u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output); - u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output); - u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output); }; - } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 84624be00..04b9ef540 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h @@ -37,6 +37,7 @@ public: VAddr addr; Status status; u32 refcount; + u32 dma_map_addr; }; std::shared_ptr<Object> GetObject(u32 handle) const { diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 197c77db0..803c1a984 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -51,9 +51,9 @@ Module::Module(Core::System& system) { devices["/dev/nvmap"] = nvmap_dev; devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface); - devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system); + devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); - devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system); + devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); } Module::~Module() = default; diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp index e64777668..ffbf90b00 100644 --- a/src/core/hle/service/set/set.cpp +++ b/src/core/hle/service/set/set.cpp @@ -202,6 +202,7 @@ SET::SET() : ServiceFramework("set") { {8, &SET::GetQuestFlag, "GetQuestFlag"}, {9, &SET::GetKeyCodeMap2, "GetKeyCodeMap2"}, {10, nullptr, "GetFirmwareVersionForDebug"}, + {11, nullptr, "GetDeviceNickName"}, }; // clang-format on diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp index 8bd4c7e79..080b5743e 100644 --- a/src/core/hle/service/set/set_sys.cpp +++ b/src/core/hle/service/set/set_sys.cpp @@ -300,6 +300,8 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") { {198, nullptr, "SetButtonConfigRegisteredSettingsEmbedded"}, {199, nullptr, "GetButtonConfigRegisteredSettings"}, {200, nullptr, "SetButtonConfigRegisteredSettings"}, + {201, nullptr, "GetFieldTestingFlag"}, + {202, nullptr, "SetFieldTestingFlag"}, }; // clang-format on diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index ee4fa4b48..7d0474e0b 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -10,6 +10,7 @@ #include "core/hle/ipc_helpers.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" +#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/service/time/interface.h" #include "core/hle/service/time/time.h" @@ -125,7 +126,7 @@ ResultCode Module::Interface::GetClockSnapshotFromSystemClockContextInternal( Kernel::Thread* thread, Clock::SystemClockContext user_context, Clock::SystemClockContext network_context, u8 type, Clock::ClockSnapshot& clock_snapshot) { - auto& time_manager{module->GetTimeManager()}; + auto& time_manager{system.GetTimeManager()}; clock_snapshot.is_automatic_correction_enabled = time_manager.GetStandardUserSystemClockCore().IsAutomaticCorrectionEnabled(); @@ -182,7 +183,7 @@ void Module::Interface::GetStandardUserSystemClock(Kernel::HLERequestContext& ct LOG_DEBUG(Service_Time, "called"); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardUserSystemClockCore(), + rb.PushIpcInterface<ISystemClock>(system.GetTimeManager().GetStandardUserSystemClockCore(), system); } @@ -190,7 +191,7 @@ void Module::Interface::GetStandardNetworkSystemClock(Kernel::HLERequestContext& LOG_DEBUG(Service_Time, "called"); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardNetworkSystemClockCore(), + rb.PushIpcInterface<ISystemClock>(system.GetTimeManager().GetStandardNetworkSystemClockCore(), system); } @@ -198,29 +199,28 @@ void Module::Interface::GetStandardSteadyClock(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Time, "called"); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ISteadyClock>(module->GetTimeManager().GetStandardSteadyClockCore(), - system); + rb.PushIpcInterface<ISteadyClock>(system.GetTimeManager().GetStandardSteadyClockCore(), system); } void Module::Interface::GetTimeZoneService(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Time, "called"); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ITimeZoneService>(module->GetTimeManager().GetTimeZoneContentManager()); + rb.PushIpcInterface<ITimeZoneService>(system.GetTimeManager().GetTimeZoneContentManager()); } void Module::Interface::GetStandardLocalSystemClock(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Time, "called"); IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardLocalSystemClockCore(), + rb.PushIpcInterface<ISystemClock>(system.GetTimeManager().GetStandardLocalSystemClockCore(), system); } void Module::Interface::IsStandardNetworkSystemClockAccuracySufficient( Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Time, "called"); - auto& clock_core{module->GetTimeManager().GetStandardNetworkSystemClockCore()}; + auto& clock_core{system.GetTimeManager().GetStandardNetworkSystemClockCore()}; IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); rb.Push<u32>(clock_core.IsStandardNetworkSystemClockAccuracySufficient(system)); @@ -229,7 +229,7 @@ void Module::Interface::IsStandardNetworkSystemClockAccuracySufficient( void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Time, "called"); - auto& steady_clock_core{module->GetTimeManager().GetStandardSteadyClockCore()}; + auto& steady_clock_core{system.GetTimeManager().GetStandardSteadyClockCore()}; if (!steady_clock_core.IsInitialized()) { IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERROR_UNINITIALIZED_CLOCK); @@ -262,8 +262,8 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) { Clock::SystemClockContext user_context{}; if (const ResultCode result{ - module->GetTimeManager().GetStandardUserSystemClockCore().GetClockContext( - system, user_context)}; + system.GetTimeManager().GetStandardUserSystemClockCore().GetClockContext(system, + user_context)}; result.IsError()) { IPC::ResponseBuilder rb{ctx, 2}; rb.Push(result); @@ -271,7 +271,7 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) { } Clock::SystemClockContext network_context{}; if (const ResultCode result{ - module->GetTimeManager().GetStandardNetworkSystemClockCore().GetClockContext( + system.GetTimeManager().GetStandardNetworkSystemClockCore().GetClockContext( system, network_context)}; result.IsError()) { IPC::ResponseBuilder rb{ctx, 2}; @@ -372,7 +372,7 @@ void Module::Interface::GetSharedMemoryNativeHandle(Kernel::HLERequestContext& c LOG_DEBUG(Service_Time, "called"); IPC::ResponseBuilder rb{ctx, 2, 1}; rb.Push(RESULT_SUCCESS); - rb.PushCopyObjects(module->GetTimeManager().GetSharedMemory().GetSharedMemoryHolder()); + rb.PushCopyObjects(SharedFrom(&system.Kernel().GetTimeSharedMem())); } Module::Interface::Interface(std::shared_ptr<Module> module, Core::System& system, const char* name) @@ -381,7 +381,7 @@ Module::Interface::Interface(std::shared_ptr<Module> module, Core::System& syste Module::Interface::~Interface() = default; void InstallInterfaces(Core::System& system) { - auto module{std::make_shared<Module>(system)}; + auto module{std::make_shared<Module>()}; std::make_shared<Time>(module, system, "time:a")->InstallAsService(system.ServiceManager()); std::make_shared<Time>(module, system, "time:s")->InstallAsService(system.ServiceManager()); std::make_shared<Time>(module, system, "time:u")->InstallAsService(system.ServiceManager()); diff --git a/src/core/hle/service/time/time.h b/src/core/hle/service/time/time.h index 41f3002e9..49f4aac0a 100644 --- a/src/core/hle/service/time/time.h +++ b/src/core/hle/service/time/time.h @@ -16,7 +16,7 @@ namespace Service::Time { class Module final { public: - Module(Core::System& system) : time_manager{system} {} + Module() = default; class Interface : public ServiceFramework<Interface> { public: @@ -46,13 +46,6 @@ public: std::shared_ptr<Module> module; Core::System& system; }; - - TimeManager& GetTimeManager() { - return time_manager; - } - -private: - TimeManager time_manager; }; /// Registers all Time services with the specified service manager. diff --git a/src/core/hle/service/time/time_manager.cpp b/src/core/hle/service/time/time_manager.cpp index b4dfe45e5..858623e2b 100644 --- a/src/core/hle/service/time/time_manager.cpp +++ b/src/core/hle/service/time/time_manager.cpp @@ -22,125 +22,282 @@ static std::chrono::seconds GetSecondsSinceEpoch() { Settings::values.custom_rtc_differential; } -static s64 GetExternalTimeZoneOffset() { - // With "auto" timezone setting, we use the external system's timezone offset - if (Settings::GetTimeZoneString() == "auto") { - return Common::TimeZone::GetCurrentOffsetSeconds().count(); - } - return 0; -} - static s64 GetExternalRtcValue() { - return GetSecondsSinceEpoch().count() + GetExternalTimeZoneOffset(); -} - -TimeManager::TimeManager(Core::System& system) - : shared_memory{system}, standard_local_system_clock_core{standard_steady_clock_core}, - standard_network_system_clock_core{standard_steady_clock_core}, - standard_user_system_clock_core{standard_local_system_clock_core, - standard_network_system_clock_core, system}, - ephemeral_network_system_clock_core{tick_based_steady_clock_core}, - local_system_clock_context_writer{ - std::make_shared<Clock::LocalSystemClockContextWriter>(shared_memory)}, - network_system_clock_context_writer{ - std::make_shared<Clock::NetworkSystemClockContextWriter>(shared_memory)}, - ephemeral_network_system_clock_context_writer{ - std::make_shared<Clock::EphemeralNetworkSystemClockContextWriter>()}, - time_zone_content_manager{*this, system} { - - const auto system_time{Clock::TimeSpanType::FromSeconds(GetExternalRtcValue())}; - SetupStandardSteadyClock(system, Common::UUID::Generate(), system_time, {}, {}); - SetupStandardLocalSystemClock(system, {}, system_time.ToSeconds()); - SetupStandardNetworkSystemClock({}, standard_network_clock_accuracy); - SetupStandardUserSystemClock(system, {}, Clock::SteadyClockTimePoint::GetRandom()); - SetupEphemeralNetworkSystemClock(); + return GetSecondsSinceEpoch().count() + TimeManager::GetExternalTimeZoneOffset(); } -TimeManager::~TimeManager() = default; +struct TimeManager::Impl final { + explicit Impl(Core::System& system) + : shared_memory{system}, standard_local_system_clock_core{standard_steady_clock_core}, + standard_network_system_clock_core{standard_steady_clock_core}, + standard_user_system_clock_core{standard_local_system_clock_core, + standard_network_system_clock_core, system}, + ephemeral_network_system_clock_core{tick_based_steady_clock_core}, + local_system_clock_context_writer{ + std::make_shared<Clock::LocalSystemClockContextWriter>(shared_memory)}, + network_system_clock_context_writer{ + std::make_shared<Clock::NetworkSystemClockContextWriter>(shared_memory)}, + ephemeral_network_system_clock_context_writer{ + std::make_shared<Clock::EphemeralNetworkSystemClockContextWriter>()}, + time_zone_content_manager{system} { -void TimeManager::SetupTimeZoneManager(std::string location_name, - Clock::SteadyClockTimePoint time_zone_updated_time_point, - std::size_t total_location_name_count, - u128 time_zone_rule_version, - FileSys::VirtualFile& vfs_file) { - if (time_zone_content_manager.GetTimeZoneManager().SetDeviceLocationNameWithTimeZoneRule( - location_name, vfs_file) != RESULT_SUCCESS) { - UNREACHABLE(); - return; - } - - time_zone_content_manager.GetTimeZoneManager().SetUpdatedTime(time_zone_updated_time_point); - time_zone_content_manager.GetTimeZoneManager().SetTotalLocationNameCount( - total_location_name_count); - time_zone_content_manager.GetTimeZoneManager().SetTimeZoneRuleVersion(time_zone_rule_version); - time_zone_content_manager.GetTimeZoneManager().MarkAsInitialized(); -} - -void TimeManager::SetupStandardSteadyClock(Core::System& system, Common::UUID clock_source_id, - Clock::TimeSpanType setup_value, - Clock::TimeSpanType internal_offset, - bool is_rtc_reset_detected) { - standard_steady_clock_core.SetClockSourceId(clock_source_id); - standard_steady_clock_core.SetSetupValue(setup_value); - standard_steady_clock_core.SetInternalOffset(internal_offset); - standard_steady_clock_core.MarkAsInitialized(); - - const auto current_time_point{standard_steady_clock_core.GetCurrentRawTimePoint(system)}; - shared_memory.SetupStandardSteadyClock(system, clock_source_id, current_time_point); -} - -void TimeManager::SetupStandardLocalSystemClock(Core::System& system, - Clock::SystemClockContext clock_context, - s64 posix_time) { - standard_local_system_clock_core.SetUpdateCallbackInstance(local_system_clock_context_writer); - - const auto current_time_point{ - standard_local_system_clock_core.GetSteadyClockCore().GetCurrentTimePoint(system)}; - if (current_time_point.clock_source_id == clock_context.steady_time_point.clock_source_id) { - standard_local_system_clock_core.SetSystemClockContext(clock_context); - } else { - if (standard_local_system_clock_core.SetCurrentTime(system, posix_time) != RESULT_SUCCESS) { + const auto system_time{Clock::TimeSpanType::FromSeconds(GetExternalRtcValue())}; + SetupStandardSteadyClock(system, Common::UUID::Generate(), system_time, {}, {}); + SetupStandardLocalSystemClock(system, {}, system_time.ToSeconds()); + SetupStandardNetworkSystemClock({}, standard_network_clock_accuracy); + SetupStandardUserSystemClock(system, {}, Clock::SteadyClockTimePoint::GetRandom()); + SetupEphemeralNetworkSystemClock(); + } + + ~Impl() = default; + + Clock::StandardSteadyClockCore& GetStandardSteadyClockCore() { + return standard_steady_clock_core; + } + + const Clock::StandardSteadyClockCore& GetStandardSteadyClockCore() const { + return standard_steady_clock_core; + } + + Clock::StandardLocalSystemClockCore& GetStandardLocalSystemClockCore() { + return standard_local_system_clock_core; + } + + const Clock::StandardLocalSystemClockCore& GetStandardLocalSystemClockCore() const { + return standard_local_system_clock_core; + } + + Clock::StandardNetworkSystemClockCore& GetStandardNetworkSystemClockCore() { + return standard_network_system_clock_core; + } + + const Clock::StandardNetworkSystemClockCore& GetStandardNetworkSystemClockCore() const { + return standard_network_system_clock_core; + } + + Clock::StandardUserSystemClockCore& GetStandardUserSystemClockCore() { + return standard_user_system_clock_core; + } + + const Clock::StandardUserSystemClockCore& GetStandardUserSystemClockCore() const { + return standard_user_system_clock_core; + } + + TimeZone::TimeZoneContentManager& GetTimeZoneContentManager() { + return time_zone_content_manager; + } + + const TimeZone::TimeZoneContentManager& GetTimeZoneContentManager() const { + return time_zone_content_manager; + } + + SharedMemory& GetSharedMemory() { + return shared_memory; + } + + const SharedMemory& GetSharedMemory() const { + return shared_memory; + } + + void SetupTimeZoneManager(std::string location_name, + Clock::SteadyClockTimePoint time_zone_updated_time_point, + std::size_t total_location_name_count, u128 time_zone_rule_version, + FileSys::VirtualFile& vfs_file) { + if (time_zone_content_manager.GetTimeZoneManager().SetDeviceLocationNameWithTimeZoneRule( + location_name, vfs_file) != RESULT_SUCCESS) { UNREACHABLE(); return; } + + time_zone_content_manager.GetTimeZoneManager().SetUpdatedTime(time_zone_updated_time_point); + time_zone_content_manager.GetTimeZoneManager().SetTotalLocationNameCount( + total_location_name_count); + time_zone_content_manager.GetTimeZoneManager().SetTimeZoneRuleVersion( + time_zone_rule_version); + time_zone_content_manager.GetTimeZoneManager().MarkAsInitialized(); } - standard_local_system_clock_core.MarkAsInitialized(); -} + static s64 GetExternalTimeZoneOffset() { + // With "auto" timezone setting, we use the external system's timezone offset + if (Settings::GetTimeZoneString() == "auto") { + return Common::TimeZone::GetCurrentOffsetSeconds().count(); + } + return 0; + } -void TimeManager::SetupStandardNetworkSystemClock(Clock::SystemClockContext clock_context, - Clock::TimeSpanType sufficient_accuracy) { - standard_network_system_clock_core.SetUpdateCallbackInstance( - network_system_clock_context_writer); + void SetupStandardSteadyClock(Core::System& system, Common::UUID clock_source_id, + Clock::TimeSpanType setup_value, + Clock::TimeSpanType internal_offset, bool is_rtc_reset_detected) { + standard_steady_clock_core.SetClockSourceId(clock_source_id); + standard_steady_clock_core.SetSetupValue(setup_value); + standard_steady_clock_core.SetInternalOffset(internal_offset); + standard_steady_clock_core.MarkAsInitialized(); - if (standard_network_system_clock_core.SetSystemClockContext(clock_context) != RESULT_SUCCESS) { - UNREACHABLE(); - return; + const auto current_time_point{standard_steady_clock_core.GetCurrentRawTimePoint(system)}; + shared_memory.SetupStandardSteadyClock(system, clock_source_id, current_time_point); } - standard_network_system_clock_core.SetStandardNetworkClockSufficientAccuracy( - sufficient_accuracy); - standard_network_system_clock_core.MarkAsInitialized(); -} + void SetupStandardLocalSystemClock(Core::System& system, + Clock::SystemClockContext clock_context, s64 posix_time) { + standard_local_system_clock_core.SetUpdateCallbackInstance( + local_system_clock_context_writer); + + const auto current_time_point{ + standard_local_system_clock_core.GetSteadyClockCore().GetCurrentTimePoint(system)}; + if (current_time_point.clock_source_id == clock_context.steady_time_point.clock_source_id) { + standard_local_system_clock_core.SetSystemClockContext(clock_context); + } else { + if (standard_local_system_clock_core.SetCurrentTime(system, posix_time) != + RESULT_SUCCESS) { + UNREACHABLE(); + return; + } + } + + standard_local_system_clock_core.MarkAsInitialized(); + } + + void SetupStandardNetworkSystemClock(Clock::SystemClockContext clock_context, + Clock::TimeSpanType sufficient_accuracy) { + standard_network_system_clock_core.SetUpdateCallbackInstance( + network_system_clock_context_writer); -void TimeManager::SetupStandardUserSystemClock( - Core::System& system, bool is_automatic_correction_enabled, - Clock::SteadyClockTimePoint steady_clock_time_point) { - if (standard_user_system_clock_core.SetAutomaticCorrectionEnabled( - system, is_automatic_correction_enabled) != RESULT_SUCCESS) { - UNREACHABLE(); - return; + if (standard_network_system_clock_core.SetSystemClockContext(clock_context) != + RESULT_SUCCESS) { + UNREACHABLE(); + return; + } + + standard_network_system_clock_core.SetStandardNetworkClockSufficientAccuracy( + sufficient_accuracy); + standard_network_system_clock_core.MarkAsInitialized(); } - standard_user_system_clock_core.SetAutomaticCorrectionUpdatedTime(steady_clock_time_point); - standard_user_system_clock_core.MarkAsInitialized(); - shared_memory.SetAutomaticCorrectionEnabled(is_automatic_correction_enabled); + void SetupStandardUserSystemClock(Core::System& system, bool is_automatic_correction_enabled, + Clock::SteadyClockTimePoint steady_clock_time_point) { + if (standard_user_system_clock_core.SetAutomaticCorrectionEnabled( + system, is_automatic_correction_enabled) != RESULT_SUCCESS) { + UNREACHABLE(); + return; + } + + standard_user_system_clock_core.SetAutomaticCorrectionUpdatedTime(steady_clock_time_point); + standard_user_system_clock_core.MarkAsInitialized(); + shared_memory.SetAutomaticCorrectionEnabled(is_automatic_correction_enabled); + } + + void SetupEphemeralNetworkSystemClock() { + ephemeral_network_system_clock_core.SetUpdateCallbackInstance( + ephemeral_network_system_clock_context_writer); + ephemeral_network_system_clock_core.MarkAsInitialized(); + } + + void UpdateLocalSystemClockTime(Core::System& system, s64 posix_time) { + const auto timespan{Service::Time::Clock::TimeSpanType::FromSeconds(posix_time)}; + if (GetStandardLocalSystemClockCore() + .SetCurrentTime(system, timespan.ToSeconds()) + .IsError()) { + UNREACHABLE(); + return; + } + } + + SharedMemory shared_memory; + + Clock::StandardSteadyClockCore standard_steady_clock_core; + Clock::TickBasedSteadyClockCore tick_based_steady_clock_core; + Clock::StandardLocalSystemClockCore standard_local_system_clock_core; + Clock::StandardNetworkSystemClockCore standard_network_system_clock_core; + Clock::StandardUserSystemClockCore standard_user_system_clock_core; + Clock::EphemeralNetworkSystemClockCore ephemeral_network_system_clock_core; + + std::shared_ptr<Clock::LocalSystemClockContextWriter> local_system_clock_context_writer; + std::shared_ptr<Clock::NetworkSystemClockContextWriter> network_system_clock_context_writer; + std::shared_ptr<Clock::EphemeralNetworkSystemClockContextWriter> + ephemeral_network_system_clock_context_writer; + + TimeZone::TimeZoneContentManager time_zone_content_manager; +}; + +TimeManager::TimeManager(Core::System& system) : system{system} {} + +TimeManager::~TimeManager() = default; + +void TimeManager::Initialize() { + impl = std::make_unique<Impl>(system); + + // Time zones can only be initialized after impl is valid + impl->time_zone_content_manager.Initialize(*this); +} + +Clock::StandardSteadyClockCore& TimeManager::GetStandardSteadyClockCore() { + return impl->standard_steady_clock_core; +} + +const Clock::StandardSteadyClockCore& TimeManager::GetStandardSteadyClockCore() const { + return impl->standard_steady_clock_core; +} + +Clock::StandardLocalSystemClockCore& TimeManager::GetStandardLocalSystemClockCore() { + return impl->standard_local_system_clock_core; +} + +const Clock::StandardLocalSystemClockCore& TimeManager::GetStandardLocalSystemClockCore() const { + return impl->standard_local_system_clock_core; +} + +Clock::StandardNetworkSystemClockCore& TimeManager::GetStandardNetworkSystemClockCore() { + return impl->standard_network_system_clock_core; } -void TimeManager::SetupEphemeralNetworkSystemClock() { - ephemeral_network_system_clock_core.SetUpdateCallbackInstance( - ephemeral_network_system_clock_context_writer); - ephemeral_network_system_clock_core.MarkAsInitialized(); +const Clock::StandardNetworkSystemClockCore& TimeManager::GetStandardNetworkSystemClockCore() + const { + return impl->standard_network_system_clock_core; +} + +Clock::StandardUserSystemClockCore& TimeManager::GetStandardUserSystemClockCore() { + return impl->standard_user_system_clock_core; +} + +const Clock::StandardUserSystemClockCore& TimeManager::GetStandardUserSystemClockCore() const { + return impl->standard_user_system_clock_core; +} + +TimeZone::TimeZoneContentManager& TimeManager::GetTimeZoneContentManager() { + return impl->time_zone_content_manager; +} + +const TimeZone::TimeZoneContentManager& TimeManager::GetTimeZoneContentManager() const { + return impl->time_zone_content_manager; +} + +SharedMemory& TimeManager::GetSharedMemory() { + return impl->shared_memory; +} + +const SharedMemory& TimeManager::GetSharedMemory() const { + return impl->shared_memory; +} + +void TimeManager::UpdateLocalSystemClockTime(s64 posix_time) { + impl->UpdateLocalSystemClockTime(system, posix_time); +} + +void TimeManager::SetupTimeZoneManager(std::string location_name, + Clock::SteadyClockTimePoint time_zone_updated_time_point, + std::size_t total_location_name_count, + u128 time_zone_rule_version, + FileSys::VirtualFile& vfs_file) { + impl->SetupTimeZoneManager(location_name, time_zone_updated_time_point, + total_location_name_count, time_zone_rule_version, vfs_file); +} + +/*static*/ s64 TimeManager::GetExternalTimeZoneOffset() { + // With "auto" timezone setting, we use the external system's timezone offset + if (Settings::GetTimeZoneString() == "auto") { + return Common::TimeZone::GetCurrentOffsetSeconds().count(); + } + return 0; } } // namespace Service::Time diff --git a/src/core/hle/service/time/time_manager.h b/src/core/hle/service/time/time_manager.h index 8e65f0d22..993c7c288 100644 --- a/src/core/hle/service/time/time_manager.h +++ b/src/core/hle/service/time/time_manager.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "common/time_zone.h" #include "core/file_sys/vfs_types.h" #include "core/hle/service/time/clock_types.h" #include "core/hle/service/time/ephemeral_network_system_clock_core.h" @@ -32,86 +33,46 @@ public: explicit TimeManager(Core::System& system); ~TimeManager(); - Clock::StandardSteadyClockCore& GetStandardSteadyClockCore() { - return standard_steady_clock_core; - } + void Initialize(); - const Clock::StandardSteadyClockCore& GetStandardSteadyClockCore() const { - return standard_steady_clock_core; - } + Clock::StandardSteadyClockCore& GetStandardSteadyClockCore(); - Clock::StandardLocalSystemClockCore& GetStandardLocalSystemClockCore() { - return standard_local_system_clock_core; - } + const Clock::StandardSteadyClockCore& GetStandardSteadyClockCore() const; - const Clock::StandardLocalSystemClockCore& GetStandardLocalSystemClockCore() const { - return standard_local_system_clock_core; - } + Clock::StandardLocalSystemClockCore& GetStandardLocalSystemClockCore(); - Clock::StandardNetworkSystemClockCore& GetStandardNetworkSystemClockCore() { - return standard_network_system_clock_core; - } + const Clock::StandardLocalSystemClockCore& GetStandardLocalSystemClockCore() const; - const Clock::StandardNetworkSystemClockCore& GetStandardNetworkSystemClockCore() const { - return standard_network_system_clock_core; - } + Clock::StandardNetworkSystemClockCore& GetStandardNetworkSystemClockCore(); - Clock::StandardUserSystemClockCore& GetStandardUserSystemClockCore() { - return standard_user_system_clock_core; - } + const Clock::StandardNetworkSystemClockCore& GetStandardNetworkSystemClockCore() const; - const Clock::StandardUserSystemClockCore& GetStandardUserSystemClockCore() const { - return standard_user_system_clock_core; - } + Clock::StandardUserSystemClockCore& GetStandardUserSystemClockCore(); - TimeZone::TimeZoneContentManager& GetTimeZoneContentManager() { - return time_zone_content_manager; - } + const Clock::StandardUserSystemClockCore& GetStandardUserSystemClockCore() const; - const TimeZone::TimeZoneContentManager& GetTimeZoneContentManager() const { - return time_zone_content_manager; - } + TimeZone::TimeZoneContentManager& GetTimeZoneContentManager(); - SharedMemory& GetSharedMemory() { - return shared_memory; - } + const TimeZone::TimeZoneContentManager& GetTimeZoneContentManager() const; - const SharedMemory& GetSharedMemory() const { - return shared_memory; - } + void UpdateLocalSystemClockTime(s64 posix_time); + + SharedMemory& GetSharedMemory(); + + const SharedMemory& GetSharedMemory() const; void SetupTimeZoneManager(std::string location_name, Clock::SteadyClockTimePoint time_zone_updated_time_point, std::size_t total_location_name_count, u128 time_zone_rule_version, FileSys::VirtualFile& vfs_file); + static s64 GetExternalTimeZoneOffset(); + private: - void SetupStandardSteadyClock(Core::System& system, Common::UUID clock_source_id, - Clock::TimeSpanType setup_value, - Clock::TimeSpanType internal_offset, bool is_rtc_reset_detected); - void SetupStandardLocalSystemClock(Core::System& system, - Clock::SystemClockContext clock_context, s64 posix_time); - void SetupStandardNetworkSystemClock(Clock::SystemClockContext clock_context, - Clock::TimeSpanType sufficient_accuracy); - void SetupStandardUserSystemClock(Core::System& system, bool is_automatic_correction_enabled, - Clock::SteadyClockTimePoint steady_clock_time_point); - void SetupEphemeralNetworkSystemClock(); - - SharedMemory shared_memory; - - Clock::StandardSteadyClockCore standard_steady_clock_core; - Clock::TickBasedSteadyClockCore tick_based_steady_clock_core; - Clock::StandardLocalSystemClockCore standard_local_system_clock_core; - Clock::StandardNetworkSystemClockCore standard_network_system_clock_core; - Clock::StandardUserSystemClockCore standard_user_system_clock_core; - Clock::EphemeralNetworkSystemClockCore ephemeral_network_system_clock_core; - - std::shared_ptr<Clock::LocalSystemClockContextWriter> local_system_clock_context_writer; - std::shared_ptr<Clock::NetworkSystemClockContextWriter> network_system_clock_context_writer; - std::shared_ptr<Clock::EphemeralNetworkSystemClockContextWriter> - ephemeral_network_system_clock_context_writer; - - TimeZone::TimeZoneContentManager time_zone_content_manager; + Core::System& system; + + struct Impl; + std::unique_ptr<Impl> impl; }; } // namespace Service::Time diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp index 320672add..4177d0a41 100644 --- a/src/core/hle/service/time/time_zone_content_manager.cpp +++ b/src/core/hle/service/time/time_zone_content_manager.cpp @@ -68,9 +68,10 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) { return location_name_cache; } -TimeZoneContentManager::TimeZoneContentManager(TimeManager& time_manager, Core::System& system) - : system{system}, location_name_cache{BuildLocationNameCache(system)} { +TimeZoneContentManager::TimeZoneContentManager(Core::System& system) + : system{system}, location_name_cache{BuildLocationNameCache(system)} {} +void TimeZoneContentManager::Initialize(TimeManager& time_manager) { std::string location_name; const auto timezone_setting = Settings::GetTimeZoneString(); if (timezone_setting == "auto" || timezone_setting == "default") { diff --git a/src/core/hle/service/time/time_zone_content_manager.h b/src/core/hle/service/time/time_zone_content_manager.h index 4f302c3b9..52dd1a020 100644 --- a/src/core/hle/service/time/time_zone_content_manager.h +++ b/src/core/hle/service/time/time_zone_content_manager.h @@ -21,7 +21,9 @@ namespace Service::Time::TimeZone { class TimeZoneContentManager final { public: - TimeZoneContentManager(TimeManager& time_manager, Core::System& system); + explicit TimeZoneContentManager(Core::System& system); + + void Initialize(TimeManager& time_manager); TimeZoneManager& GetTimeZoneManager() { return time_zone_manager; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 28d3f9099..e14c02045 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -63,6 +63,7 @@ void LogSettings() { log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue()); log_setting("Renderer_UseAsynchronousGpuEmulation", values.use_asynchronous_gpu_emulation.GetValue()); + log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); @@ -119,6 +120,7 @@ void RestoreGlobalState() { values.use_disk_shader_cache.SetGlobal(true); values.gpu_accuracy.SetGlobal(true); values.use_asynchronous_gpu_emulation.SetGlobal(true); + values.use_nvdec_emulation.SetGlobal(true); values.use_vsync.SetGlobal(true); values.use_assembly_shaders.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); diff --git a/src/core/settings.h b/src/core/settings.h index 9834f44bb..604805615 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -111,6 +111,7 @@ struct Values { Setting<bool> use_disk_shader_cache; Setting<GPUAccuracy> gpu_accuracy; Setting<bool> use_asynchronous_gpu_emulation; + Setting<bool> use_nvdec_emulation; Setting<bool> use_vsync; Setting<bool> use_assembly_shaders; Setting<bool> use_asynchronous_shaders; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index da09c0dbc..ebc19e18a 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -206,6 +206,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue())); AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", Settings::values.use_asynchronous_gpu_emulation.GetValue()); + AddField(field_type, "Renderer_UseNvdecEmulation", + Settings::values.use_nvdec_emulation.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders.GetValue()); diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp index c95feb0d7..b912188b6 100644 --- a/src/input_common/gcadapter/gc_adapter.cpp +++ b/src/input_common/gcadapter/gc_adapter.cpp @@ -21,26 +21,6 @@ namespace GCAdapter { -// Used to loop through and assign button in poller -constexpr std::array<PadButton, 12> PadButtonArray{ - PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT, PadButton::PAD_BUTTON_DOWN, - PadButton::PAD_BUTTON_UP, PadButton::PAD_TRIGGER_Z, PadButton::PAD_TRIGGER_R, - PadButton::PAD_TRIGGER_L, PadButton::PAD_BUTTON_A, PadButton::PAD_BUTTON_B, - PadButton::PAD_BUTTON_X, PadButton::PAD_BUTTON_Y, PadButton::PAD_BUTTON_START, -}; - -static void PadToState(const GCPadStatus& pad, GCState& out_state) { - for (const auto& button : PadButtonArray) { - const auto button_key = static_cast<u16>(button); - const auto button_value = (pad.button & button_key) != 0; - out_state.buttons.insert_or_assign(static_cast<s32>(button_key), button_value); - } - - for (std::size_t i = 0; i < pad.axis_values.size(); ++i) { - out_state.axes.insert_or_assign(static_cast<u32>(i), pad.axis_values[i]); - } -} - Adapter::Adapter() { if (usb_adapter_handle != nullptr) { return; @@ -49,168 +29,263 @@ Adapter::Adapter() { const int init_res = libusb_init(&libusb_ctx); if (init_res == LIBUSB_SUCCESS) { - Setup(); + adapter_scan_thread = std::thread(&Adapter::AdapterScanThread, this); } else { LOG_ERROR(Input, "libusb could not be initialized. failed with error = {}", init_res); } } -GCPadStatus Adapter::GetPadStatus(std::size_t port, const std::array<u8, 37>& adapter_payload) { - GCPadStatus pad = {}; - const std::size_t offset = 1 + (9 * port); +Adapter::~Adapter() { + Reset(); +} + +void Adapter::AdapterInputThread() { + LOG_DEBUG(Input, "GC Adapter input thread started"); + s32 payload_size{}; + AdapterPayload adapter_payload{}; + + if (adapter_scan_thread.joinable()) { + adapter_scan_thread.join(); + } + + while (adapter_input_thread_running) { + libusb_interrupt_transfer(usb_adapter_handle, input_endpoint, adapter_payload.data(), + static_cast<s32>(adapter_payload.size()), &payload_size, 16); + if (IsPayloadCorrect(adapter_payload, payload_size)) { + UpdateControllers(adapter_payload); + UpdateVibrations(); + } + std::this_thread::yield(); + } - adapter_controllers_status[port] = static_cast<ControllerTypes>(adapter_payload[offset] >> 4); + if (restart_scan_thread) { + adapter_scan_thread = std::thread(&Adapter::AdapterScanThread, this); + restart_scan_thread = false; + } +} + +bool Adapter::IsPayloadCorrect(const AdapterPayload& adapter_payload, s32 payload_size) { + if (payload_size != static_cast<s32>(adapter_payload.size()) || + adapter_payload[0] != LIBUSB_DT_HID) { + LOG_DEBUG(Input, "Error reading payload (size: {}, type: {:02x})", payload_size, + adapter_payload[0]); + if (input_error_counter++ > 20) { + LOG_ERROR(Input, "GC adapter timeout, Is the adapter connected?"); + adapter_input_thread_running = false; + restart_scan_thread = true; + } + return false; + } + + input_error_counter = 0; + return true; +} + +void Adapter::UpdateControllers(const AdapterPayload& adapter_payload) { + for (std::size_t port = 0; port < pads.size(); ++port) { + const std::size_t offset = 1 + (9 * port); + const auto type = static_cast<ControllerTypes>(adapter_payload[offset] >> 4); + UpdatePadType(port, type); + if (DeviceConnected(port)) { + const u8 b1 = adapter_payload[offset + 1]; + const u8 b2 = adapter_payload[offset + 2]; + UpdateStateButtons(port, b1, b2); + UpdateStateAxes(port, adapter_payload); + if (configuring) { + UpdateYuzuSettings(port); + } + } + } +} + +void Adapter::UpdatePadType(std::size_t port, ControllerTypes pad_type) { + if (pads[port].type == pad_type) { + return; + } + // Device changed reset device and set new type + ResetDevice(port); + pads[port].type = pad_type; +} + +void Adapter::UpdateStateButtons(std::size_t port, u8 b1, u8 b2) { + if (port >= pads.size()) { + return; + } static constexpr std::array<PadButton, 8> b1_buttons{ - PadButton::PAD_BUTTON_A, PadButton::PAD_BUTTON_B, PadButton::PAD_BUTTON_X, - PadButton::PAD_BUTTON_Y, PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT, - PadButton::PAD_BUTTON_DOWN, PadButton::PAD_BUTTON_UP, + PadButton::ButtonA, PadButton::ButtonB, PadButton::ButtonX, PadButton::ButtonY, + PadButton::ButtonLeft, PadButton::ButtonRight, PadButton::ButtonDown, PadButton::ButtonUp, }; static constexpr std::array<PadButton, 4> b2_buttons{ - PadButton::PAD_BUTTON_START, - PadButton::PAD_TRIGGER_Z, - PadButton::PAD_TRIGGER_R, - PadButton::PAD_TRIGGER_L, + PadButton::ButtonStart, + PadButton::TriggerZ, + PadButton::TriggerR, + PadButton::TriggerL, }; + pads[port].buttons = 0; + for (std::size_t i = 0; i < b1_buttons.size(); ++i) { + if ((b1 & (1U << i)) != 0) { + pads[port].buttons = + static_cast<u16>(pads[port].buttons | static_cast<u16>(b1_buttons[i])); + pads[port].last_button = b1_buttons[i]; + } + } + for (std::size_t j = 0; j < b2_buttons.size(); ++j) { + if ((b2 & (1U << j)) != 0) { + pads[port].buttons = + static_cast<u16>(pads[port].buttons | static_cast<u16>(b2_buttons[j])); + pads[port].last_button = b2_buttons[j]; + } + } +} + +void Adapter::UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_payload) { + if (port >= pads.size()) { + return; + } + + const std::size_t offset = 1 + (9 * port); static constexpr std::array<PadAxes, 6> axes{ PadAxes::StickX, PadAxes::StickY, PadAxes::SubstickX, PadAxes::SubstickY, PadAxes::TriggerLeft, PadAxes::TriggerRight, }; - if (adapter_controllers_status[port] == ControllerTypes::None && !get_origin[port]) { - // Controller may have been disconnected, recalibrate if reconnected. - get_origin[port] = true; + for (const PadAxes axis : axes) { + const auto index = static_cast<std::size_t>(axis); + const u8 axis_value = adapter_payload[offset + 3 + index]; + if (pads[port].axis_origin[index] == 255) { + pads[port].axis_origin[index] = axis_value; + } + pads[port].axis_values[index] = + static_cast<s16>(axis_value - pads[port].axis_origin[index]); } +} - if (adapter_controllers_status[port] != ControllerTypes::None) { - const u8 b1 = adapter_payload[offset + 1]; - const u8 b2 = adapter_payload[offset + 2]; +void Adapter::UpdateYuzuSettings(std::size_t port) { + if (port >= pads.size()) { + return; + } - for (std::size_t i = 0; i < b1_buttons.size(); ++i) { - if ((b1 & (1U << i)) != 0) { - pad.button = static_cast<u16>(pad.button | static_cast<u16>(b1_buttons[i])); - } - } + constexpr u8 axis_threshold = 50; + GCPadStatus pad_status = {.port = port}; - for (std::size_t j = 0; j < b2_buttons.size(); ++j) { - if ((b2 & (1U << j)) != 0) { - pad.button = static_cast<u16>(pad.button | static_cast<u16>(b2_buttons[j])); - } - } - for (PadAxes axis : axes) { - const auto index = static_cast<std::size_t>(axis); - pad.axis_values[index] = adapter_payload[offset + 3 + index]; - } + if (pads[port].buttons != 0) { + pad_status.button = pads[port].last_button; + pad_queue.Push(pad_status); + } + + // Accounting for a threshold here to ensure an intentional press + for (std::size_t i = 0; i < pads[port].axis_values.size(); ++i) { + const s16 value = pads[port].axis_values[i]; - if (get_origin[port]) { - origin_status[port].axis_values = pad.axis_values; - get_origin[port] = false; + if (value > axis_threshold || value < -axis_threshold) { + pad_status.axis = static_cast<PadAxes>(i); + pad_status.axis_value = value; + pad_status.axis_threshold = axis_threshold; + pad_queue.Push(pad_status); } } - return pad; } -void Adapter::Read() { - LOG_DEBUG(Input, "GC Adapter Read() thread started"); +void Adapter::UpdateVibrations() { + // Use 8 states to keep the switching between on/off fast enough for + // a human to not notice the difference between switching from on/off + // More states = more rumble strengths = slower update time + constexpr u8 vibration_states = 8; - int payload_size; - std::array<u8, 37> adapter_payload; - std::array<GCPadStatus, 4> pads; - - while (adapter_thread_running) { - libusb_interrupt_transfer(usb_adapter_handle, input_endpoint, adapter_payload.data(), - sizeof(adapter_payload), &payload_size, 16); - - if (payload_size != sizeof(adapter_payload) || adapter_payload[0] != LIBUSB_DT_HID) { - LOG_ERROR(Input, - "Error reading payload (size: {}, type: {:02x}) Is the adapter connected?", - payload_size, adapter_payload[0]); - adapter_thread_running = false; // error reading from adapter, stop reading. - break; - } - for (std::size_t port = 0; port < pads.size(); ++port) { - pads[port] = GetPadStatus(port, adapter_payload); - if (DeviceConnected(port) && configuring) { - if (pads[port].button != 0) { - pad_queue[port].Push(pads[port]); - } + vibration_counter = (vibration_counter + 1) % vibration_states; - // Accounting for a threshold here to ensure an intentional press - for (size_t i = 0; i < pads[port].axis_values.size(); ++i) { - const u8 value = pads[port].axis_values[i]; - const u8 origin = origin_status[port].axis_values[i]; - - if (value > origin + pads[port].THRESHOLD || - value < origin - pads[port].THRESHOLD) { - pads[port].axis = static_cast<PadAxes>(i); - pads[port].axis_value = pads[port].axis_values[i]; - pad_queue[port].Push(pads[port]); - } - } - } - PadToState(pads[port], state[port]); - } - std::this_thread::yield(); + for (GCController& pad : pads) { + const bool vibrate = pad.rumble_amplitude > vibration_counter; + vibration_changed |= vibrate != pad.enable_vibration; + pad.enable_vibration = vibrate; } + SendVibrations(); } -void Adapter::Setup() { - // Initialize all controllers as unplugged - adapter_controllers_status.fill(ControllerTypes::None); - // Initialize all ports to store axis origin values - get_origin.fill(true); - - // pointer to list of connected usb devices - libusb_device** devices{}; - - // populate the list of devices, get the count - const ssize_t device_count = libusb_get_device_list(libusb_ctx, &devices); - if (device_count < 0) { - LOG_ERROR(Input, "libusb_get_device_list failed with error: {}", device_count); +void Adapter::SendVibrations() { + if (!rumble_enabled || !vibration_changed) { return; } - - if (devices != nullptr) { - for (std::size_t index = 0; index < static_cast<std::size_t>(device_count); ++index) { - if (CheckDeviceAccess(devices[index])) { - // GC Adapter found and accessible, registering it - GetGCEndpoint(devices[index]); - break; - } + s32 size{}; + constexpr u8 rumble_command = 0x11; + const u8 p1 = pads[0].enable_vibration; + const u8 p2 = pads[1].enable_vibration; + const u8 p3 = pads[2].enable_vibration; + const u8 p4 = pads[3].enable_vibration; + std::array<u8, 5> payload = {rumble_command, p1, p2, p3, p4}; + const int err = libusb_interrupt_transfer(usb_adapter_handle, output_endpoint, payload.data(), + static_cast<s32>(payload.size()), &size, 16); + if (err) { + LOG_DEBUG(Input, "Adapter libusb write failed: {}", libusb_error_name(err)); + if (output_error_counter++ > 5) { + LOG_ERROR(Input, "GC adapter output timeout, Rumble disabled"); + rumble_enabled = false; } - libusb_free_device_list(devices, 1); + return; } + output_error_counter = 0; + vibration_changed = false; } -bool Adapter::CheckDeviceAccess(libusb_device* device) { - libusb_device_descriptor desc; - const int get_descriptor_error = libusb_get_device_descriptor(device, &desc); - if (get_descriptor_error) { - // could not acquire the descriptor, no point in trying to use it. - LOG_ERROR(Input, "libusb_get_device_descriptor failed with error: {}", - get_descriptor_error); - return false; +bool Adapter::RumblePlay(std::size_t port, f32 amplitude) { + amplitude = std::clamp(amplitude, 0.0f, 1.0f); + const auto raw_amp = static_cast<u8>(amplitude * 0x8); + pads[port].rumble_amplitude = raw_amp; + + return rumble_enabled; +} + +void Adapter::AdapterScanThread() { + adapter_scan_thread_running = true; + adapter_input_thread_running = false; + if (adapter_input_thread.joinable()) { + adapter_input_thread.join(); + } + ClearLibusbHandle(); + ResetDevices(); + while (adapter_scan_thread_running && !adapter_input_thread_running) { + Setup(); + std::this_thread::sleep_for(std::chrono::seconds(1)); } +} - if (desc.idVendor != 0x057e || desc.idProduct != 0x0337) { - // This isn't the device we are looking for. - return false; +void Adapter::Setup() { + usb_adapter_handle = libusb_open_device_with_vid_pid(libusb_ctx, 0x057e, 0x0337); + + if (usb_adapter_handle == NULL) { + return; + } + if (!CheckDeviceAccess()) { + ClearLibusbHandle(); + return; } - const int open_error = libusb_open(device, &usb_adapter_handle); - if (open_error == LIBUSB_ERROR_ACCESS) { - LOG_ERROR(Input, "Yuzu can not gain access to this device: ID {:04X}:{:04X}.", - desc.idVendor, desc.idProduct); - return false; + libusb_device* device = libusb_get_device(usb_adapter_handle); + + LOG_INFO(Input, "GC adapter is now connected"); + // GC Adapter found and accessible, registering it + if (GetGCEndpoint(device)) { + adapter_scan_thread_running = false; + adapter_input_thread_running = true; + rumble_enabled = true; + input_error_counter = 0; + output_error_counter = 0; + adapter_input_thread = std::thread(&Adapter::AdapterInputThread, this); } - if (open_error) { - LOG_ERROR(Input, "libusb_open failed to open device with error = {}", open_error); - return false; +} + +bool Adapter::CheckDeviceAccess() { + // This fixes payload problems from offbrand GCAdapters + const s32 control_transfer_error = + libusb_control_transfer(usb_adapter_handle, 0x21, 11, 0x0001, 0, nullptr, 0, 1000); + if (control_transfer_error < 0) { + LOG_ERROR(Input, "libusb_control_transfer failed with error= {}", control_transfer_error); } - int kernel_driver_error = libusb_kernel_driver_active(usb_adapter_handle, 0); + s32 kernel_driver_error = libusb_kernel_driver_active(usb_adapter_handle, 0); if (kernel_driver_error == 1) { kernel_driver_error = libusb_detach_kernel_driver(usb_adapter_handle, 0); if (kernel_driver_error != 0 && kernel_driver_error != LIBUSB_ERROR_NOT_SUPPORTED) { @@ -236,13 +311,13 @@ bool Adapter::CheckDeviceAccess(libusb_device* device) { return true; } -void Adapter::GetGCEndpoint(libusb_device* device) { +bool Adapter::GetGCEndpoint(libusb_device* device) { libusb_config_descriptor* config = nullptr; const int config_descriptor_return = libusb_get_config_descriptor(device, 0, &config); if (config_descriptor_return != LIBUSB_SUCCESS) { LOG_ERROR(Input, "libusb_get_config_descriptor failed with error = {}", config_descriptor_return); - return; + return false; } for (u8 ic = 0; ic < config->bNumInterfaces; ic++) { @@ -264,31 +339,51 @@ void Adapter::GetGCEndpoint(libusb_device* device) { unsigned char clear_payload = 0x13; libusb_interrupt_transfer(usb_adapter_handle, output_endpoint, &clear_payload, sizeof(clear_payload), nullptr, 16); - - adapter_thread_running = true; - adapter_input_thread = std::thread(&Adapter::Read, this); + return true; } -Adapter::~Adapter() { - Reset(); -} +void Adapter::JoinThreads() { + restart_scan_thread = false; + adapter_input_thread_running = false; + adapter_scan_thread_running = false; -void Adapter::Reset() { - if (adapter_thread_running) { - adapter_thread_running = false; + if (adapter_scan_thread.joinable()) { + adapter_scan_thread.join(); } + if (adapter_input_thread.joinable()) { adapter_input_thread.join(); } +} - adapter_controllers_status.fill(ControllerTypes::None); - get_origin.fill(true); - +void Adapter::ClearLibusbHandle() { if (usb_adapter_handle) { libusb_release_interface(usb_adapter_handle, 1); libusb_close(usb_adapter_handle); usb_adapter_handle = nullptr; } +} + +void Adapter::ResetDevices() { + for (std::size_t i = 0; i < pads.size(); ++i) { + ResetDevice(i); + } +} + +void Adapter::ResetDevice(std::size_t port) { + pads[port].type = ControllerTypes::None; + pads[port].enable_vibration = false; + pads[port].rumble_amplitude = 0; + pads[port].buttons = 0; + pads[port].last_button = PadButton::Undefined; + pads[port].axis_values.fill(0); + pads[port].axis_origin.fill(255); +} + +void Adapter::Reset() { + JoinThreads(); + ClearLibusbHandle(); + ResetDevices(); if (libusb_ctx) { libusb_exit(libusb_ctx); @@ -297,11 +392,11 @@ void Adapter::Reset() { std::vector<Common::ParamPackage> Adapter::GetInputDevices() const { std::vector<Common::ParamPackage> devices; - for (std::size_t port = 0; port < state.size(); ++port) { + for (std::size_t port = 0; port < pads.size(); ++port) { if (!DeviceConnected(port)) { continue; } - std::string name = fmt::format("Gamecube Controller {}", port); + std::string name = fmt::format("Gamecube Controller {}", port + 1); devices.emplace_back(Common::ParamPackage{ {"class", "gcpad"}, {"display", std::move(name)}, @@ -318,18 +413,18 @@ InputCommon::ButtonMapping Adapter::GetButtonMappingForDevice( // This list also excludes any button that can't be really mapped static constexpr std::array<std::pair<Settings::NativeButton::Values, PadButton>, 12> switch_to_gcadapter_button = { - std::pair{Settings::NativeButton::A, PadButton::PAD_BUTTON_A}, - {Settings::NativeButton::B, PadButton::PAD_BUTTON_B}, - {Settings::NativeButton::X, PadButton::PAD_BUTTON_X}, - {Settings::NativeButton::Y, PadButton::PAD_BUTTON_Y}, - {Settings::NativeButton::Plus, PadButton::PAD_BUTTON_START}, - {Settings::NativeButton::DLeft, PadButton::PAD_BUTTON_LEFT}, - {Settings::NativeButton::DUp, PadButton::PAD_BUTTON_UP}, - {Settings::NativeButton::DRight, PadButton::PAD_BUTTON_RIGHT}, - {Settings::NativeButton::DDown, PadButton::PAD_BUTTON_DOWN}, - {Settings::NativeButton::SL, PadButton::PAD_TRIGGER_L}, - {Settings::NativeButton::SR, PadButton::PAD_TRIGGER_R}, - {Settings::NativeButton::R, PadButton::PAD_TRIGGER_Z}, + std::pair{Settings::NativeButton::A, PadButton::ButtonA}, + {Settings::NativeButton::B, PadButton::ButtonB}, + {Settings::NativeButton::X, PadButton::ButtonX}, + {Settings::NativeButton::Y, PadButton::ButtonY}, + {Settings::NativeButton::Plus, PadButton::ButtonStart}, + {Settings::NativeButton::DLeft, PadButton::ButtonLeft}, + {Settings::NativeButton::DUp, PadButton::ButtonUp}, + {Settings::NativeButton::DRight, PadButton::ButtonRight}, + {Settings::NativeButton::DDown, PadButton::ButtonDown}, + {Settings::NativeButton::SL, PadButton::TriggerL}, + {Settings::NativeButton::SR, PadButton::TriggerR}, + {Settings::NativeButton::R, PadButton::TriggerZ}, }; if (!params.Has("port")) { return {}; @@ -352,8 +447,10 @@ InputCommon::ButtonMapping Adapter::GetButtonMappingForDevice( for (const auto& [switch_button, gcadapter_axis] : switch_to_gcadapter_axis) { Common::ParamPackage button_params({{"engine", "gcpad"}}); button_params.Set("port", params.Get("port", 0)); - button_params.Set("button", static_cast<int>(PadButton::PAD_STICK)); - button_params.Set("axis", static_cast<int>(gcadapter_axis)); + button_params.Set("button", static_cast<s32>(PadButton::Stick)); + button_params.Set("axis", static_cast<s32>(gcadapter_axis)); + button_params.Set("threshold", 0.5f); + button_params.Set("direction", "+"); mapping.insert_or_assign(switch_button, std::move(button_params)); } return mapping; @@ -382,46 +479,33 @@ InputCommon::AnalogMapping Adapter::GetAnalogMappingForDevice( } bool Adapter::DeviceConnected(std::size_t port) const { - return adapter_controllers_status[port] != ControllerTypes::None; -} - -void Adapter::ResetDeviceType(std::size_t port) { - adapter_controllers_status[port] = ControllerTypes::None; + return pads[port].type != ControllerTypes::None; } void Adapter::BeginConfiguration() { - get_origin.fill(true); - for (auto& pq : pad_queue) { - pq.Clear(); - } + pad_queue.Clear(); configuring = true; } void Adapter::EndConfiguration() { - for (auto& pq : pad_queue) { - pq.Clear(); - } + pad_queue.Clear(); configuring = false; } -std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() { +Common::SPSCQueue<GCPadStatus>& Adapter::GetPadQueue() { return pad_queue; } -const std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() const { +const Common::SPSCQueue<GCPadStatus>& Adapter::GetPadQueue() const { return pad_queue; } -std::array<GCState, 4>& Adapter::GetPadState() { - return state; -} - -const std::array<GCState, 4>& Adapter::GetPadState() const { - return state; +GCController& Adapter::GetPadState(std::size_t port) { + return pads.at(port); } -int Adapter::GetOriginValue(u32 port, u32 axis) const { - return origin_status[port].axis_values[axis]; +const GCController& Adapter::GetPadState(std::size_t port) const { + return pads.at(port); } } // namespace GCAdapter diff --git a/src/input_common/gcadapter/gc_adapter.h b/src/input_common/gcadapter/gc_adapter.h index 4f5f3de8e..d28dcfad3 100644 --- a/src/input_common/gcadapter/gc_adapter.h +++ b/src/input_common/gcadapter/gc_adapter.h @@ -19,24 +19,23 @@ struct libusb_device_handle; namespace GCAdapter { enum class PadButton { - PAD_BUTTON_LEFT = 0x0001, - PAD_BUTTON_RIGHT = 0x0002, - PAD_BUTTON_DOWN = 0x0004, - PAD_BUTTON_UP = 0x0008, - PAD_TRIGGER_Z = 0x0010, - PAD_TRIGGER_R = 0x0020, - PAD_TRIGGER_L = 0x0040, - PAD_BUTTON_A = 0x0100, - PAD_BUTTON_B = 0x0200, - PAD_BUTTON_X = 0x0400, - PAD_BUTTON_Y = 0x0800, - PAD_BUTTON_START = 0x1000, + Undefined = 0x0000, + ButtonLeft = 0x0001, + ButtonRight = 0x0002, + ButtonDown = 0x0004, + ButtonUp = 0x0008, + TriggerZ = 0x0010, + TriggerR = 0x0020, + TriggerL = 0x0040, + ButtonA = 0x0100, + ButtonB = 0x0200, + ButtonX = 0x0400, + ButtonY = 0x0800, + ButtonStart = 0x1000, // Below is for compatibility with "AxisButton" type - PAD_STICK = 0x2000, + Stick = 0x2000, }; -extern const std::array<PadButton, 12> PadButtonArray; - enum class PadAxes : u8 { StickX, StickY, @@ -47,87 +46,122 @@ enum class PadAxes : u8 { Undefined, }; +enum class ControllerTypes { + None, + Wired, + Wireless, +}; + struct GCPadStatus { - u16 button{}; // Or-ed PAD_BUTTON_* and PAD_TRIGGER_* bits + std::size_t port{}; - std::array<u8, 6> axis_values{}; // Triggers and sticks, following indices defined in PadAxes - static constexpr u8 THRESHOLD = 50; // Threshold for axis press for polling + PadButton button{PadButton::Undefined}; // Or-ed PAD_BUTTON_* and PAD_TRIGGER_* bits - u8 port{}; PadAxes axis{PadAxes::Undefined}; - u8 axis_value{255}; + s16 axis_value{}; + u8 axis_threshold{50}; }; -struct GCState { - std::unordered_map<int, bool> buttons; - std::unordered_map<u32, u16> axes; +struct GCController { + ControllerTypes type{}; + bool enable_vibration{}; + u8 rumble_amplitude{}; + u16 buttons{}; + PadButton last_button{}; + std::array<s16, 6> axis_values{}; + std::array<u8, 6> axis_origin{}; }; -enum class ControllerTypes { None, Wired, Wireless }; - class Adapter { public: - /// Initialize the GC Adapter capture and read sequence Adapter(); - - /// Close the adapter read thread and release the adapter ~Adapter(); + + /// Request a vibration for a controlelr + bool RumblePlay(std::size_t port, f32 amplitude); + /// Used for polling void BeginConfiguration(); void EndConfiguration(); + Common::SPSCQueue<GCPadStatus>& GetPadQueue(); + const Common::SPSCQueue<GCPadStatus>& GetPadQueue() const; + + GCController& GetPadState(std::size_t port); + const GCController& GetPadState(std::size_t port) const; + + /// Returns true if there is a device connected to port + bool DeviceConnected(std::size_t port) const; + + /// Used for automapping features std::vector<Common::ParamPackage> GetInputDevices() const; InputCommon::ButtonMapping GetButtonMappingForDevice(const Common::ParamPackage& params) const; InputCommon::AnalogMapping GetAnalogMappingForDevice(const Common::ParamPackage& params) const; - /// Returns true if there is a device connected to port - bool DeviceConnected(std::size_t port) const; +private: + using AdapterPayload = std::array<u8, 37>; - std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue(); - const std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue() const; + void UpdatePadType(std::size_t port, ControllerTypes pad_type); + void UpdateControllers(const AdapterPayload& adapter_payload); + void UpdateYuzuSettings(std::size_t port); + void UpdateStateButtons(std::size_t port, u8 b1, u8 b2); + void UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_payload); + void UpdateVibrations(); - std::array<GCState, 4>& GetPadState(); - const std::array<GCState, 4>& GetPadState() const; + void AdapterInputThread(); - int GetOriginValue(u32 port, u32 axis) const; + void AdapterScanThread(); -private: - GCPadStatus GetPadStatus(std::size_t port, const std::array<u8, 37>& adapter_payload); + bool IsPayloadCorrect(const AdapterPayload& adapter_payload, s32 payload_size); + + // Updates vibration state of all controllers + void SendVibrations(); + + /// For use in initialization, querying devices to find the adapter + void Setup(); - void Read(); + /// Resets status of all GC controller devices to a disconected state + void ResetDevices(); - /// Resets status of device connected to port - void ResetDeviceType(std::size_t port); + /// Resets status of device connected to a disconected state + void ResetDevice(std::size_t port); /// Returns true if we successfully gain access to GC Adapter - bool CheckDeviceAccess(libusb_device* device); + bool CheckDeviceAccess(); - /// Captures GC Adapter endpoint address, - void GetGCEndpoint(libusb_device* device); + /// Captures GC Adapter endpoint address + /// Returns true if the endpoind was set correctly + bool GetGCEndpoint(libusb_device* device); /// For shutting down, clear all data, join all threads, release usb void Reset(); - /// For use in initialization, querying devices to find the adapter - void Setup(); + // Join all threads + void JoinThreads(); + + // Release usb handles + void ClearLibusbHandle(); libusb_device_handle* usb_adapter_handle = nullptr; + std::array<GCController, 4> pads; + Common::SPSCQueue<GCPadStatus> pad_queue; std::thread adapter_input_thread; - bool adapter_thread_running; + std::thread adapter_scan_thread; + bool adapter_input_thread_running; + bool adapter_scan_thread_running; + bool restart_scan_thread; libusb_context* libusb_ctx; - u8 input_endpoint = 0; - u8 output_endpoint = 0; - - bool configuring = false; + u8 input_endpoint{0}; + u8 output_endpoint{0}; + u8 input_error_counter{0}; + u8 output_error_counter{0}; + int vibration_counter{0}; - std::array<GCState, 4> state; - std::array<bool, 4> get_origin; - std::array<GCPadStatus, 4> origin_status; - std::array<Common::SPSCQueue<GCPadStatus>, 4> pad_queue; - std::array<ControllerTypes, 4> adapter_controllers_status{}; + bool configuring{false}; + bool rumble_enabled{true}; + bool vibration_changed{true}; }; - } // namespace GCAdapter diff --git a/src/input_common/gcadapter/gc_poller.cpp b/src/input_common/gcadapter/gc_poller.cpp index 893556916..6bd6f57fc 100644 --- a/src/input_common/gcadapter/gc_poller.cpp +++ b/src/input_common/gcadapter/gc_poller.cpp @@ -15,22 +15,30 @@ namespace InputCommon { class GCButton final : public Input::ButtonDevice { public: - explicit GCButton(u32 port_, int button_, const GCAdapter::Adapter* adapter) + explicit GCButton(u32 port_, s32 button_, GCAdapter::Adapter* adapter) : port(port_), button(button_), gcadapter(adapter) {} ~GCButton() override; bool GetStatus() const override { if (gcadapter->DeviceConnected(port)) { - return gcadapter->GetPadState()[port].buttons.at(button); + return (gcadapter->GetPadState(port).buttons & button) != 0; } return false; } + bool SetRumblePlay(f32 amp_high, f32 amp_low, f32 freq_high, f32 freq_low) const override { + const float amplitude = amp_high + amp_low > 2.0f ? 1.0f : (amp_high + amp_low) * 0.5f; + const auto new_amp = + static_cast<f32>(pow(amplitude, 0.5f) * (3.0f - 2.0f * pow(amplitude, 0.15f))); + + return gcadapter->RumblePlay(port, new_amp); + } + private: const u32 port; - const int button; - const GCAdapter::Adapter* gcadapter; + const s32 button; + GCAdapter::Adapter* gcadapter; }; class GCAxisButton final : public Input::ButtonDevice { @@ -38,13 +46,12 @@ public: explicit GCAxisButton(u32 port_, u32 axis_, float threshold_, bool trigger_if_greater_, const GCAdapter::Adapter* adapter) : port(port_), axis(axis_), threshold(threshold_), trigger_if_greater(trigger_if_greater_), - gcadapter(adapter), - origin_value(static_cast<float>(adapter->GetOriginValue(port_, axis_))) {} + gcadapter(adapter) {} bool GetStatus() const override { if (gcadapter->DeviceConnected(port)) { - const float current_axis_value = gcadapter->GetPadState()[port].axes.at(axis); - const float axis_value = (current_axis_value - origin_value) / 128.0f; + const float current_axis_value = gcadapter->GetPadState(port).axis_values.at(axis); + const float axis_value = current_axis_value / 128.0f; if (trigger_if_greater) { // TODO: Might be worthwile to set a slider for the trigger threshold. It is // currently always set to 0.5 in configure_input_player.cpp ZL/ZR HandleClick @@ -61,7 +68,6 @@ private: float threshold; bool trigger_if_greater; const GCAdapter::Adapter* gcadapter; - const float origin_value; }; GCButtonFactory::GCButtonFactory(std::shared_ptr<GCAdapter::Adapter> adapter_) @@ -73,7 +79,7 @@ std::unique_ptr<Input::ButtonDevice> GCButtonFactory::Create(const Common::Param const auto button_id = params.Get("button", 0); const auto port = static_cast<u32>(params.Get("port", 0)); - constexpr int PAD_STICK_ID = static_cast<u16>(GCAdapter::PadButton::PAD_STICK); + constexpr s32 PAD_STICK_ID = static_cast<s32>(GCAdapter::PadButton::Stick); // button is not an axis/stick button if (button_id != PAD_STICK_ID) { @@ -106,32 +112,25 @@ Common::ParamPackage GCButtonFactory::GetNextInput() const { Common::ParamPackage params; GCAdapter::GCPadStatus pad; auto& queue = adapter->GetPadQueue(); - for (std::size_t port = 0; port < queue.size(); ++port) { - while (queue[port].Pop(pad)) { - // This while loop will break on the earliest detected button - params.Set("engine", "gcpad"); - params.Set("port", static_cast<int>(port)); - for (const auto& button : GCAdapter::PadButtonArray) { - const u16 button_value = static_cast<u16>(button); - if (pad.button & button_value) { - params.Set("button", button_value); - break; - } - } + while (queue.Pop(pad)) { + // This while loop will break on the earliest detected button + params.Set("engine", "gcpad"); + params.Set("port", static_cast<s32>(pad.port)); + if (pad.button != GCAdapter::PadButton::Undefined) { + params.Set("button", static_cast<u16>(pad.button)); + } - // For Axis button implementation - if (pad.axis != GCAdapter::PadAxes::Undefined) { - params.Set("axis", static_cast<u8>(pad.axis)); - params.Set("button", static_cast<u16>(GCAdapter::PadButton::PAD_STICK)); - if (pad.axis_value > 128) { - params.Set("direction", "+"); - params.Set("threshold", "0.25"); - } else { - params.Set("direction", "-"); - params.Set("threshold", "-0.25"); - } - break; + // For Axis button implementation + if (pad.axis != GCAdapter::PadAxes::Undefined) { + params.Set("axis", static_cast<u8>(pad.axis)); + params.Set("button", static_cast<u16>(GCAdapter::PadButton::Stick)); + params.Set("threshold", "0.25"); + if (pad.axis_value > 0) { + params.Set("direction", "+"); + } else { + params.Set("direction", "-"); } + break; } } return params; @@ -152,17 +151,14 @@ public: explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_, const GCAdapter::Adapter* adapter, float range_) : port(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), gcadapter(adapter), - origin_value_x(static_cast<float>(adapter->GetOriginValue(port_, axis_x_))), - origin_value_y(static_cast<float>(adapter->GetOriginValue(port_, axis_y_))), range(range_) {} float GetAxis(u32 axis) const { if (gcadapter->DeviceConnected(port)) { std::lock_guard lock{mutex}; - const auto origin_value = axis % 2 == 0 ? origin_value_x : origin_value_y; const auto axis_value = - static_cast<float>(gcadapter->GetPadState()[port].axes.at(axis)); - return (axis_value - origin_value) / (100.0f * range); + static_cast<float>(gcadapter->GetPadState(port).axis_values.at(axis)); + return (axis_value) / (100.0f * range); } return 0.0f; } @@ -215,8 +211,6 @@ private: const u32 axis_y; const float deadzone; const GCAdapter::Adapter* gcadapter; - const float origin_value_x; - const float origin_value_y; const float range; mutable std::mutex mutex; }; @@ -254,26 +248,44 @@ void GCAnalogFactory::EndConfiguration() { Common::ParamPackage GCAnalogFactory::GetNextInput() { GCAdapter::GCPadStatus pad; + Common::ParamPackage params; auto& queue = adapter->GetPadQueue(); - for (std::size_t port = 0; port < queue.size(); ++port) { - while (queue[port].Pop(pad)) { - if (pad.axis == GCAdapter::PadAxes::Undefined || - std::abs((static_cast<float>(pad.axis_value) - 128.0f) / 128.0f) < 0.1f) { - continue; - } - // An analog device needs two axes, so we need to store the axis for later and wait for - // a second input event. The axes also must be from the same joystick. - const u8 axis = static_cast<u8>(pad.axis); - if (analog_x_axis == -1) { - analog_x_axis = axis; - controller_number = static_cast<int>(port); - } else if (analog_y_axis == -1 && analog_x_axis != axis && - controller_number == static_cast<int>(port)) { - analog_y_axis = axis; - } + while (queue.Pop(pad)) { + if (pad.button != GCAdapter::PadButton::Undefined) { + params.Set("engine", "gcpad"); + params.Set("port", static_cast<s32>(pad.port)); + params.Set("button", static_cast<u16>(pad.button)); + return params; + } + if (pad.axis == GCAdapter::PadAxes::Undefined || + std::abs(static_cast<float>(pad.axis_value) / 128.0f) < 0.1f) { + continue; + } + // An analog device needs two axes, so we need to store the axis for later and wait for + // a second input event. The axes also must be from the same joystick. + const u8 axis = static_cast<u8>(pad.axis); + if (axis == 0 || axis == 1) { + analog_x_axis = 0; + analog_y_axis = 1; + controller_number = static_cast<s32>(pad.port); + break; + } + if (axis == 2 || axis == 3) { + analog_x_axis = 2; + analog_y_axis = 3; + controller_number = static_cast<s32>(pad.port); + break; + } + + if (analog_x_axis == -1) { + analog_x_axis = axis; + controller_number = static_cast<s32>(pad.port); + } else if (analog_y_axis == -1 && analog_x_axis != axis && + controller_number == static_cast<s32>(pad.port)) { + analog_y_axis = axis; + break; } } - Common::ParamPackage params; if (analog_x_axis != -1 && analog_y_axis != -1) { params.Set("engine", "gcpad"); params.Set("port", controller_number); diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp index 9c3035920..10883e2d9 100644 --- a/src/input_common/sdl/sdl_impl.cpp +++ b/src/input_common/sdl/sdl_impl.cpp @@ -155,15 +155,15 @@ public: return sdl_joystick.get(); } - void SetSDLJoystick(SDL_Joystick* joystick, SDL_GameController* controller) { - sdl_controller.reset(controller); - sdl_joystick.reset(joystick); - } - SDL_GameController* GetSDLGameController() const { return sdl_controller.get(); } + void SetSDLJoystick(SDL_Joystick* joystick, SDL_GameController* controller) { + sdl_joystick.reset(joystick); + sdl_controller.reset(controller); + } + private: struct State { std::unordered_map<int, bool> buttons; @@ -186,69 +186,58 @@ private: std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickByGUID(const std::string& guid, int port) { std::lock_guard lock{joystick_map_mutex}; const auto it = joystick_map.find(guid); + if (it != joystick_map.end()) { while (it->second.size() <= static_cast<std::size_t>(port)) { auto joystick = std::make_shared<SDLJoystick>(guid, static_cast<int>(it->second.size()), nullptr, nullptr); it->second.emplace_back(std::move(joystick)); } + return it->second[static_cast<std::size_t>(port)]; } + auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, nullptr); + return joystick_map[guid].emplace_back(std::move(joystick)); } std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) { auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id); - auto sdl_controller = SDL_GameControllerFromInstanceID(sdl_id); const std::string guid = GetGUID(sdl_joystick); std::lock_guard lock{joystick_map_mutex}; const auto map_it = joystick_map.find(guid); - if (map_it != joystick_map.end()) { - const auto vec_it = - std::find_if(map_it->second.begin(), map_it->second.end(), - [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) { - return sdl_joystick == joystick->GetSDLJoystick(); - }); - if (vec_it != map_it->second.end()) { - // This is the common case: There is already an existing SDL_Joystick mapped to a - // SDLJoystick. return the SDLJoystick - return *vec_it; - } - // Search for a SDLJoystick without a mapped SDL_Joystick... - const auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(), - [](const std::shared_ptr<SDLJoystick>& joystick) { - return joystick->GetSDLJoystick() == nullptr; - }); - if (nullptr_it != map_it->second.end()) { - // ... and map it - (*nullptr_it)->SetSDLJoystick(sdl_joystick, sdl_controller); - return *nullptr_it; - } + if (map_it == joystick_map.end()) { + return nullptr; + } - // There is no SDLJoystick without a mapped SDL_Joystick - // Create a new SDLJoystick - const int port = static_cast<int>(map_it->second.size()); - auto joystick = std::make_shared<SDLJoystick>(guid, port, sdl_joystick, sdl_controller); - return map_it->second.emplace_back(std::move(joystick)); + const auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(), + [&sdl_joystick](const auto& joystick) { + return joystick->GetSDLJoystick() == sdl_joystick; + }); + + if (vec_it == map_it->second.end()) { + return nullptr; } - auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick, sdl_controller); - return joystick_map[guid].emplace_back(std::move(joystick)); + return *vec_it; } void SDLState::InitJoystick(int joystick_index) { SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index); SDL_GameController* sdl_gamecontroller = nullptr; + if (SDL_IsGameController(joystick_index)) { sdl_gamecontroller = SDL_GameControllerOpen(joystick_index); } + if (!sdl_joystick) { LOG_ERROR(Input, "Failed to open joystick {}", joystick_index); return; } + const std::string guid = GetGUID(sdl_joystick); std::lock_guard lock{joystick_map_mutex}; @@ -257,14 +246,17 @@ void SDLState::InitJoystick(int joystick_index) { joystick_map[guid].emplace_back(std::move(joystick)); return; } + auto& joystick_guid_list = joystick_map[guid]; - const auto it = std::find_if( - joystick_guid_list.begin(), joystick_guid_list.end(), - [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); }); - if (it != joystick_guid_list.end()) { - (*it)->SetSDLJoystick(sdl_joystick, sdl_gamecontroller); + const auto joystick_it = + std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(), + [](const auto& joystick) { return !joystick->GetSDLJoystick(); }); + + if (joystick_it != joystick_guid_list.end()) { + (*joystick_it)->SetSDLJoystick(sdl_joystick, sdl_gamecontroller); return; } + const int port = static_cast<int>(joystick_guid_list.size()); auto joystick = std::make_shared<SDLJoystick>(guid, port, sdl_joystick, sdl_gamecontroller); joystick_guid_list.emplace_back(std::move(joystick)); @@ -274,18 +266,14 @@ void SDLState::CloseJoystick(SDL_Joystick* sdl_joystick) { const std::string guid = GetGUID(sdl_joystick); std::lock_guard lock{joystick_map_mutex}; - auto& joystick_guid_list = joystick_map[guid]; - auto joystick_it = std::find_if( - joystick_guid_list.begin(), joystick_guid_list.end(), - [&sdl_joystick](auto& joystick) { return joystick->GetSDLJoystick() == sdl_joystick; }); - - if (joystick_it != joystick_guid_list.end()) { - (*joystick_it)->SetSDLJoystick(nullptr, nullptr); - joystick_guid_list.erase(joystick_it); - if (joystick_guid_list.empty()) { - joystick_map.erase(guid); - } - } + // This call to guid is safe since the joystick is guaranteed to be in the map + const auto& joystick_guid_list = joystick_map[guid]; + const auto joystick_it = std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(), + [&sdl_joystick](const auto& joystick) { + return joystick->GetSDLJoystick() == sdl_joystick; + }); + + (*joystick_it)->SetSDLJoystick(nullptr, nullptr); } void SDLState::HandleGameControllerEvent(const SDL_Event& event) { @@ -720,8 +708,7 @@ std::vector<Common::ParamPackage> SDLState::GetInputDevices() { std::vector<Common::ParamPackage> devices; for (const auto& [key, value] : joystick_map) { for (const auto& joystick : value) { - auto* joy = joystick->GetSDLJoystick(); - if (auto* controller = joystick->GetSDLGameController()) { + if (auto* const controller = joystick->GetSDLGameController()) { std::string name = fmt::format("{} {}", SDL_GameControllerName(controller), joystick->GetPort()); devices.emplace_back(Common::ParamPackage{ @@ -730,7 +717,7 @@ std::vector<Common::ParamPackage> SDLState::GetInputDevices() { {"guid", joystick->GetGUID()}, {"port", std::to_string(joystick->GetPort())}, }); - } else if (joy) { + } else if (auto* const joy = joystick->GetSDLJoystick()) { std::string name = fmt::format("{} {}", SDL_JoystickName(joy), joystick->GetPort()); devices.emplace_back(Common::ParamPackage{ {"class", "sdl"}, @@ -797,21 +784,27 @@ Common::ParamPackage BuildHatParamPackageForButton(int port, std::string guid, s Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event) { switch (event.type) { case SDL_JOYAXISMOTION: { - const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which); - return BuildAnalogParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), - static_cast<s32>(event.jaxis.axis), - event.jaxis.value); + if (const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which)) { + return BuildAnalogParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), + static_cast<s32>(event.jaxis.axis), + event.jaxis.value); + } + break; } case SDL_JOYBUTTONUP: { - const auto joystick = state.GetSDLJoystickBySDLID(event.jbutton.which); - return BuildButtonParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), - static_cast<s32>(event.jbutton.button)); + if (const auto joystick = state.GetSDLJoystickBySDLID(event.jbutton.which)) { + return BuildButtonParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), + static_cast<s32>(event.jbutton.button)); + } + break; } case SDL_JOYHATMOTION: { - const auto joystick = state.GetSDLJoystickBySDLID(event.jhat.which); - return BuildHatParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), - static_cast<s32>(event.jhat.hat), - static_cast<s32>(event.jhat.value)); + if (const auto joystick = state.GetSDLJoystickBySDLID(event.jhat.which)) { + return BuildHatParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), + static_cast<s32>(event.jhat.hat), + static_cast<s32>(event.jhat.value)); + } + break; } } return {}; @@ -820,21 +813,27 @@ Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Eve Common::ParamPackage SDLEventToMotionParamPackage(SDLState& state, const SDL_Event& event) { switch (event.type) { case SDL_JOYAXISMOTION: { - const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which); - return BuildAnalogParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), - static_cast<s32>(event.jaxis.axis), - event.jaxis.value); + if (const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which)) { + return BuildAnalogParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), + static_cast<s32>(event.jaxis.axis), + event.jaxis.value); + } + break; } case SDL_JOYBUTTONUP: { - const auto joystick = state.GetSDLJoystickBySDLID(event.jbutton.which); - return BuildButtonParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), - static_cast<s32>(event.jbutton.button)); + if (const auto joystick = state.GetSDLJoystickBySDLID(event.jbutton.which)) { + return BuildButtonParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), + static_cast<s32>(event.jbutton.button)); + } + break; } case SDL_JOYHATMOTION: { - const auto joystick = state.GetSDLJoystickBySDLID(event.jhat.which); - return BuildHatParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), - static_cast<s32>(event.jhat.hat), - static_cast<s32>(event.jhat.value)); + if (const auto joystick = state.GetSDLJoystickBySDLID(event.jhat.which)) { + return BuildHatParamPackageForButton(joystick->GetPort(), joystick->GetGUID(), + static_cast<s32>(event.jhat.hat), + static_cast<s32>(event.jhat.value)); + } + break; } } return {}; @@ -1062,9 +1061,8 @@ public: // Simplify controller config by testing if game controller support is enabled. if (event.type == SDL_JOYAXISMOTION) { const auto axis = event.jaxis.axis; - const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which); - auto* const controller = joystick->GetSDLGameController(); - if (controller) { + if (const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which); + auto* const controller = joystick->GetSDLGameController()) { const auto axis_left_x = SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_LEFTX) .value.axis; @@ -1098,12 +1096,13 @@ public: } if (analog_x_axis != -1 && analog_y_axis != -1) { - const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which); - auto params = BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(), - analog_x_axis, analog_y_axis); - analog_x_axis = -1; - analog_y_axis = -1; - return params; + if (const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which)) { + auto params = BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(), + analog_x_axis, analog_y_axis); + analog_x_axis = -1; + analog_y_axis = -1; + return params; + } } return {}; } diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp index 4fd92428f..4757dd2b4 100644 --- a/src/tests/common/fibers.cpp +++ b/src/tests/common/fibers.cpp @@ -6,18 +6,40 @@ #include <cstdlib> #include <functional> #include <memory> +#include <mutex> +#include <stdexcept> #include <thread> #include <unordered_map> #include <vector> #include <catch2/catch.hpp> -#include <math.h> + #include "common/common_types.h" #include "common/fiber.h" -#include "common/spin_lock.h" namespace Common { +class ThreadIds { +public: + void Register(u32 id) { + const auto thread_id = std::this_thread::get_id(); + std::scoped_lock lock{mutex}; + if (ids.contains(thread_id)) { + throw std::logic_error{"Registering the same thread twice"}; + } + ids.emplace(thread_id, id); + } + + [[nodiscard]] u32 Get() const { + std::scoped_lock lock{mutex}; + return ids.at(std::this_thread::get_id()); + } + +private: + mutable std::mutex mutex; + std::unordered_map<std::thread::id, u32> ids; +}; + class TestControl1 { public: TestControl1() = default; @@ -26,7 +48,7 @@ public: void ExecuteThread(u32 id); - std::unordered_map<std::thread::id, u32> ids; + ThreadIds thread_ids; std::vector<std::shared_ptr<Common::Fiber>> thread_fibers; std::vector<std::shared_ptr<Common::Fiber>> work_fibers; std::vector<u32> items; @@ -39,8 +61,7 @@ static void WorkControl1(void* control) { } void TestControl1::DoWork() { - std::thread::id this_id = std::this_thread::get_id(); - u32 id = ids[this_id]; + const u32 id = thread_ids.Get(); u32 value = items[id]; for (u32 i = 0; i < id; i++) { value++; @@ -50,8 +71,7 @@ void TestControl1::DoWork() { } void TestControl1::ExecuteThread(u32 id) { - std::thread::id this_id = std::this_thread::get_id(); - ids[this_id] = id; + thread_ids.Register(id); auto thread_fiber = Fiber::ThreadToFiber(); thread_fibers[id] = thread_fiber; work_fibers[id] = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl1}, this); @@ -98,8 +118,7 @@ public: value1 += i; } Fiber::YieldTo(fiber1, fiber3); - std::thread::id this_id = std::this_thread::get_id(); - u32 id = ids[this_id]; + const u32 id = thread_ids.Get(); assert1 = id == 1; value2 += 5000; Fiber::YieldTo(fiber1, thread_fibers[id]); @@ -115,8 +134,7 @@ public: } void DoWork3() { - std::thread::id this_id = std::this_thread::get_id(); - u32 id = ids[this_id]; + const u32 id = thread_ids.Get(); assert2 = id == 0; value1 += 1000; Fiber::YieldTo(fiber3, thread_fibers[id]); @@ -125,14 +143,12 @@ public: void ExecuteThread(u32 id); void CallFiber1() { - std::thread::id this_id = std::this_thread::get_id(); - u32 id = ids[this_id]; + const u32 id = thread_ids.Get(); Fiber::YieldTo(thread_fibers[id], fiber1); } void CallFiber2() { - std::thread::id this_id = std::this_thread::get_id(); - u32 id = ids[this_id]; + const u32 id = thread_ids.Get(); Fiber::YieldTo(thread_fibers[id], fiber2); } @@ -145,7 +161,7 @@ public: u32 value2{}; std::atomic<bool> trap{true}; std::atomic<bool> trap2{true}; - std::unordered_map<std::thread::id, u32> ids; + ThreadIds thread_ids; std::vector<std::shared_ptr<Common::Fiber>> thread_fibers; std::shared_ptr<Common::Fiber> fiber1; std::shared_ptr<Common::Fiber> fiber2; @@ -168,15 +184,13 @@ static void WorkControl2_3(void* control) { } void TestControl2::ExecuteThread(u32 id) { - std::thread::id this_id = std::this_thread::get_id(); - ids[this_id] = id; + thread_ids.Register(id); auto thread_fiber = Fiber::ThreadToFiber(); thread_fibers[id] = thread_fiber; } void TestControl2::Exit() { - std::thread::id this_id = std::this_thread::get_id(); - u32 id = ids[this_id]; + const u32 id = thread_ids.Get(); thread_fibers[id]->Exit(); } @@ -228,24 +242,21 @@ public: void DoWork1() { value1 += 1; Fiber::YieldTo(fiber1, fiber2); - std::thread::id this_id = std::this_thread::get_id(); - u32 id = ids[this_id]; + const u32 id = thread_ids.Get(); value3 += 1; Fiber::YieldTo(fiber1, thread_fibers[id]); } void DoWork2() { value2 += 1; - std::thread::id this_id = std::this_thread::get_id(); - u32 id = ids[this_id]; + const u32 id = thread_ids.Get(); Fiber::YieldTo(fiber2, thread_fibers[id]); } void ExecuteThread(u32 id); void CallFiber1() { - std::thread::id this_id = std::this_thread::get_id(); - u32 id = ids[this_id]; + const u32 id = thread_ids.Get(); Fiber::YieldTo(thread_fibers[id], fiber1); } @@ -254,7 +265,7 @@ public: u32 value1{}; u32 value2{}; u32 value3{}; - std::unordered_map<std::thread::id, u32> ids; + ThreadIds thread_ids; std::vector<std::shared_ptr<Common::Fiber>> thread_fibers; std::shared_ptr<Common::Fiber> fiber1; std::shared_ptr<Common::Fiber> fiber2; @@ -271,15 +282,13 @@ static void WorkControl3_2(void* control) { } void TestControl3::ExecuteThread(u32 id) { - std::thread::id this_id = std::this_thread::get_id(); - ids[this_id] = id; + thread_ids.Register(id); auto thread_fiber = Fiber::ThreadToFiber(); thread_fibers[id] = thread_fiber; } void TestControl3::Exit() { - std::thread::id this_id = std::this_thread::get_id(); - u32 id = ids[this_id]; + const u32 id = thread_ids.Get(); thread_fibers[id]->Exit(); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 77ebac19f..abcee2a1c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -5,6 +5,24 @@ add_library(video_core STATIC buffer_cache/buffer_cache.h buffer_cache/map_interval.cpp buffer_cache/map_interval.h + cdma_pusher.cpp + cdma_pusher.h + command_classes/codecs/codec.cpp + command_classes/codecs/codec.h + command_classes/codecs/h264.cpp + command_classes/codecs/h264.h + command_classes/codecs/vp9.cpp + command_classes/codecs/vp9.h + command_classes/codecs/vp9_types.h + command_classes/host1x.cpp + command_classes/host1x.h + command_classes/nvdec.cpp + command_classes/nvdec.h + command_classes/nvdec_common.h + command_classes/sync_manager.cpp + command_classes/sync_manager.h + command_classes/vic.cpp + command_classes/vic.h compatible_formats.cpp compatible_formats.h dirty_flags.cpp @@ -250,6 +268,14 @@ create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) target_link_libraries(video_core PRIVATE glad xbyak) +if (MSVC) + target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR}) + target_link_libraries(video_core PUBLIC ${FFMPEG_LIBRARY_DIR}/swscale.lib ${FFMPEG_LIBRARY_DIR}/avcodec.lib ${FFMPEG_LIBRARY_DIR}/avutil.lib) +else() + target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR}) + target_link_libraries(video_core PRIVATE ${FFMPEG_LIBRARIES}) +endif() + add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) @@ -276,7 +302,10 @@ else() target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion + -Werror=pessimizing-move + -Werror=redundant-move -Werror=switch + -Werror=type-limits -Werror=unused-variable $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess> diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp new file mode 100644 index 000000000..b60f86260 --- /dev/null +++ b/src/video_core/cdma_pusher.cpp @@ -0,0 +1,171 @@ +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#include "command_classes/host1x.h" +#include "command_classes/nvdec.h" +#include "command_classes/vic.h" +#include "common/bit_util.h" +#include "video_core/cdma_pusher.h" +#include "video_core/command_classes/nvdec_common.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +namespace Tegra { +CDmaPusher::CDmaPusher(GPU& gpu) + : gpu(gpu), nvdec_processor(std::make_shared<Nvdec>(gpu)), + vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), + host1x_processor(std::make_unique<Host1x>(gpu)), + nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)), + vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {} + +CDmaPusher::~CDmaPusher() = default; + +void CDmaPusher::Push(ChCommandHeaderList&& entries) { + cdma_queue.push(std::move(entries)); +} + +void CDmaPusher::DispatchCalls() { + while (!cdma_queue.empty()) { + Step(); + } +} + +void CDmaPusher::Step() { + const auto entries{cdma_queue.front()}; + cdma_queue.pop(); + + std::vector<u32> values(entries.size()); + std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32)); + + for (const u32 value : values) { + if (mask != 0) { + const u32 lbs = Common::CountTrailingZeroes32(mask); + mask &= ~(1U << lbs); + ExecuteCommand(static_cast<u32>(offset + lbs), value); + continue; + } else if (count != 0) { + --count; + ExecuteCommand(static_cast<u32>(offset), value); + if (incrementing) { + ++offset; + } + continue; + } + const auto mode = static_cast<ChSubmissionMode>((value >> 28) & 0xf); + switch (mode) { + case ChSubmissionMode::SetClass: { + mask = value & 0x3f; + offset = (value >> 16) & 0xfff; + current_class = static_cast<ChClassId>((value >> 6) & 0x3ff); + break; + } + case ChSubmissionMode::Incrementing: + case ChSubmissionMode::NonIncrementing: + count = value & 0xffff; + offset = (value >> 16) & 0xfff; + incrementing = mode == ChSubmissionMode::Incrementing; + break; + case ChSubmissionMode::Mask: + mask = value & 0xffff; + offset = (value >> 16) & 0xfff; + break; + case ChSubmissionMode::Immediate: { + const u32 data = value & 0xfff; + offset = (value >> 16) & 0xfff; + ExecuteCommand(static_cast<u32>(offset), data); + break; + } + default: + UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast<u32>(mode)); + break; + } + } +} + +void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { + switch (current_class) { + case ChClassId::NvDec: + ThiStateWrite(nvdec_thi_state, offset, {data}); + switch (static_cast<ThiMethod>(offset)) { + case ThiMethod::IncSyncpt: { + LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method"); + const auto syncpoint_id = static_cast<u32>(data & 0xFF); + const auto cond = static_cast<u32>((data >> 8) & 0xFF); + if (cond == 0) { + nvdec_sync->Increment(syncpoint_id); + } else { + nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); + nvdec_sync->SignalDone(syncpoint_id); + } + break; + } + case ThiMethod::SetMethod1: + LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", + static_cast<u32>(nvdec_thi_state.method_0)); + nvdec_processor->ProcessMethod( + static_cast<Tegra::Nvdec::Method>(nvdec_thi_state.method_0), {data}); + break; + default: + break; + } + break; + case ChClassId::GraphicsVic: + ThiStateWrite(vic_thi_state, static_cast<u32>(offset), {data}); + switch (static_cast<ThiMethod>(offset)) { + case ThiMethod::IncSyncpt: { + LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method"); + const auto syncpoint_id = static_cast<u32>(data & 0xFF); + const auto cond = static_cast<u32>((data >> 8) & 0xFF); + if (cond == 0) { + vic_sync->Increment(syncpoint_id); + } else { + vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); + vic_sync->SignalDone(syncpoint_id); + } + break; + } + case ThiMethod::SetMethod1: + LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", + static_cast<u32>(vic_thi_state.method_0), data); + vic_processor->ProcessMethod(static_cast<Tegra::Vic::Method>(vic_thi_state.method_0), + {data}); + break; + default: + break; + } + break; + case ChClassId::Host1x: + // This device is mainly for syncpoint synchronization + LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); + host1x_processor->ProcessMethod(static_cast<Tegra::Host1x::Method>(offset), {data}); + break; + default: + UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class)); + break; + } +} + +void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments) { + u8* const state_offset = reinterpret_cast<u8*>(&state) + sizeof(u32) * offset; + std::memcpy(state_offset, arguments.data(), sizeof(u32) * arguments.size()); +} + +} // namespace Tegra diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h new file mode 100644 index 000000000..982f309c5 --- /dev/null +++ b/src/video_core/cdma_pusher.h @@ -0,0 +1,138 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <unordered_map> +#include <vector> +#include <queue> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/command_classes/sync_manager.h" + +namespace Tegra { + +class GPU; +class Nvdec; +class Vic; +class Host1x; + +enum class ChSubmissionMode : u32 { + SetClass = 0, + Incrementing = 1, + NonIncrementing = 2, + Mask = 3, + Immediate = 4, + Restart = 5, + Gather = 6, +}; + +enum class ChClassId : u32 { + NoClass = 0x0, + Host1x = 0x1, + VideoEncodeMpeg = 0x20, + VideoEncodeNvEnc = 0x21, + VideoStreamingVi = 0x30, + VideoStreamingIsp = 0x32, + VideoStreamingIspB = 0x34, + VideoStreamingViI2c = 0x36, + GraphicsVic = 0x5d, + Graphics3D = 0x60, + GraphicsGpu = 0x61, + Tsec = 0xe0, + TsecB = 0xe1, + NvJpg = 0xc0, + NvDec = 0xf0 +}; + +enum class ChMethod : u32 { + Empty = 0, + SetMethod = 0x10, + SetData = 0x11, +}; + +union ChCommandHeader { + u32 raw; + BitField<0, 16, u32> value; + BitField<16, 12, ChMethod> method_offset; + BitField<28, 4, ChSubmissionMode> submission_mode; +}; +static_assert(sizeof(ChCommandHeader) == sizeof(u32), "ChCommand header is an invalid size"); + +struct ChCommand { + ChClassId class_id{}; + int method_offset{}; + std::vector<u32> arguments; +}; + +using ChCommandHeaderList = std::vector<Tegra::ChCommandHeader>; +using ChCommandList = std::vector<Tegra::ChCommand>; + +struct ThiRegisters { + u32_le increment_syncpt{}; + INSERT_PADDING_WORDS(1); + u32_le increment_syncpt_error{}; + u32_le ctx_switch_incremement_syncpt{}; + INSERT_PADDING_WORDS(4); + u32_le ctx_switch{}; + INSERT_PADDING_WORDS(1); + u32_le ctx_syncpt_eof{}; + INSERT_PADDING_WORDS(5); + u32_le method_0{}; + u32_le method_1{}; + INSERT_PADDING_WORDS(12); + u32_le int_status{}; + u32_le int_mask{}; +}; + +enum class ThiMethod : u32 { + IncSyncpt = offsetof(ThiRegisters, increment_syncpt) / sizeof(u32), + SetMethod0 = offsetof(ThiRegisters, method_0) / sizeof(u32), + SetMethod1 = offsetof(ThiRegisters, method_1) / sizeof(u32), +}; + +class CDmaPusher { +public: + explicit CDmaPusher(GPU& gpu); + ~CDmaPusher(); + + /// Push NVDEC command buffer entries into queue + void Push(ChCommandHeaderList&& entries); + + /// Process queued command buffer entries + void DispatchCalls(); + + /// Process one queue element + void Step(); + + /// Invoke command class devices to execute the command based on the current state + void ExecuteCommand(u32 offset, u32 data); + +private: + /// Write arguments value to the ThiRegisters member at the specified offset + void ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments); + + GPU& gpu; + + std::shared_ptr<Tegra::Nvdec> nvdec_processor; + std::unique_ptr<Tegra::Vic> vic_processor; + std::unique_ptr<Tegra::Host1x> host1x_processor; + std::unique_ptr<SyncptIncrManager> nvdec_sync; + std::unique_ptr<SyncptIncrManager> vic_sync; + ChClassId current_class{}; + ThiRegisters vic_thi_state{}; + ThiRegisters nvdec_thi_state{}; + + s32 count{}; + s32 offset{}; + s32 mask{}; + bool incrementing{}; + + // Queue of command lists to be processed + std::queue<ChCommandHeaderList> cdma_queue; +}; + +} // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp new file mode 100644 index 000000000..1adf3cd13 --- /dev/null +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -0,0 +1,115 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> +#include <fstream> +#include <vector> +#include "common/assert.h" +#include "video_core/command_classes/codecs/codec.h" +#include "video_core/command_classes/codecs/h264.h" +#include "video_core/command_classes/codecs/vp9.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +extern "C" { +#include <libavutil/opt.h> +} + +namespace Tegra { + +Codec::Codec(GPU& gpu_) + : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), + vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} + +Codec::~Codec() { + if (!initialized) { + return; + } + // Free libav memory + avcodec_send_packet(av_codec_ctx, nullptr); + avcodec_receive_frame(av_codec_ctx, av_frame); + avcodec_flush_buffers(av_codec_ctx); + + av_frame_unref(av_frame); + av_free(av_frame); + avcodec_close(av_codec_ctx); +} + +void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); + current_codec = codec; +} + +void Codec::StateWrite(u32 offset, u64 arguments) { + u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64); + std::memcpy(state_offset, &arguments, sizeof(u64)); +} + +void Codec::Decode() { + bool is_first_frame = false; + + if (!initialized) { + if (current_codec == NvdecCommon::VideoCodec::H264) { + av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); + } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { + av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); + } else { + LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast<u32>(current_codec)); + return; + } + + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_frame = av_frame_alloc(); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); + + // TODO(ameerj): libavcodec gpu hw acceleration + + const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); + if (av_error < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); + av_frame_unref(av_frame); + av_free(av_frame); + avcodec_close(av_codec_ctx); + return; + } + initialized = true; + is_first_frame = true; + } + bool vp9_hidden_frame = false; + + AVPacket packet{}; + av_init_packet(&packet); + std::vector<u8> frame_data; + + if (current_codec == NvdecCommon::VideoCodec::H264) { + frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame); + } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { + frame_data = vp9_decoder->ComposeFrameHeader(state); + vp9_hidden_frame = vp9_decoder->WasFrameHidden(); + } + + packet.data = frame_data.data(); + packet.size = static_cast<int>(frame_data.size()); + + avcodec_send_packet(av_codec_ctx, &packet); + + if (!vp9_hidden_frame) { + // Only receive/store visible frames + avcodec_receive_frame(av_codec_ctx, av_frame); + } +} + +AVFrame* Codec::GetCurrentFrame() { + return av_frame; +} + +const AVFrame* Codec::GetCurrentFrame() const { + return av_frame; +} + +NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { + return current_codec; +} + +} // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h new file mode 100644 index 000000000..cb67094f6 --- /dev/null +++ b/src/video_core/command_classes/codecs/codec.h @@ -0,0 +1,66 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include "common/common_types.h" +#include "video_core/command_classes/nvdec_common.h" + +extern "C" { +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic ignored "-Wconversion" +#endif +#include <libavcodec/avcodec.h> +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif +} + +namespace Tegra { +class GPU; +struct VicRegisters; + +namespace Decoder { +class H264; +class VP9; +} // namespace Decoder + +class Codec { +public: + explicit Codec(GPU& gpu); + ~Codec(); + + /// Sets NVDEC video stream codec + void SetTargetCodec(NvdecCommon::VideoCodec codec); + + /// Populate NvdecRegisters state with argument value at the provided offset + void StateWrite(u32 offset, u64 arguments); + + /// Call decoders to construct headers, decode AVFrame with ffmpeg + void Decode(); + + /// Returns most recently decoded frame + AVFrame* GetCurrentFrame(); + const AVFrame* GetCurrentFrame() const; + + /// Returns the value of current_codec + NvdecCommon::VideoCodec GetCurrentCodec() const; + +private: + bool initialized{}; + NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; + + AVCodec* av_codec{nullptr}; + AVCodecContext* av_codec_ctx{nullptr}; + AVFrame* av_frame{nullptr}; + + GPU& gpu; + std::unique_ptr<Decoder::H264> h264_decoder; + std::unique_ptr<Decoder::VP9> vp9_decoder; + + NvdecCommon::NvdecRegisters state{}; +}; + +} // namespace Tegra diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp new file mode 100644 index 000000000..549a40f52 --- /dev/null +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -0,0 +1,292 @@ +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#include <array> +#include "common/bit_util.h" +#include "video_core/command_classes/codecs/h264.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +namespace Tegra::Decoder { +namespace { +// ZigZag LUTs from libavcodec. +constexpr std::array<u8, 64> zig_zag_direct{ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, + 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, + 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, +}; + +constexpr std::array<u8, 16> zig_zag_scan{ + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, +}; +} // Anonymous namespace + +H264::H264(GPU& gpu_) : gpu(gpu_) {} + +H264::~H264() = default; + +std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) { + H264DecoderContext context{}; + gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); + + const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); + if (!is_first_frame && frame_number != 0) { + frame.resize(context.frame_data_size); + + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); + } else { + /// Encode header + H264BitWriter writer{}; + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(7, 5); + writer.WriteU(100, 8); + writer.WriteU(0, 8); + writer.WriteU(31, 8); + writer.WriteUe(0); + const auto chroma_format_idc = + static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3); + writer.WriteUe(chroma_format_idc); + if (chroma_format_idc == 3) { + writer.WriteBit(false); + } + + writer.WriteUe(0); + writer.WriteUe(0); + writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag + writer.WriteBit(false); // Scaling matrix present flag + + const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3); + writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf)); + writer.WriteUe(order_cnt_type); + if (order_cnt_type == 0) { + writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); + } else if (order_cnt_type == 1) { + writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); + + writer.WriteSe(0); + writer.WriteSe(0); + writer.WriteUe(0); + } + + const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / + (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + + writer.WriteUe(16); + writer.WriteBit(false); + writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); + writer.WriteUe(pic_height - 1); + writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); + + if (!context.h264_parameter_set.frame_mbs_only_flag) { + writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); + } + + writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); + writer.WriteBit(false); // Frame cropping flag + writer.WriteBit(false); // VUI parameter present flag + + writer.End(); + + // H264 PPS + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(8, 5); + + writer.WriteUe(0); + writer.WriteUe(0); + + writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); + writer.WriteBit(false); + writer.WriteUe(0); + writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); + writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); + writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); + writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); + s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); + pic_init_qp = (pic_init_qp << 26) >> 26; + writer.WriteSe(pic_init_qp); + writer.WriteSe(0); + s32 chroma_qp_index_offset = + static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f); + chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; + + writer.WriteSe(chroma_qp_index_offset); + writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); + writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); + writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); + writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + + writer.WriteBit(true); + + for (s32 index = 0; index < 6; index++) { + writer.WriteBit(true); + const auto matrix_x4 = + std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); + writer.WriteScalingList(matrix_x4, index * 16, 16); + } + + if (context.h264_parameter_set.transform_8x8_mode_flag) { + for (s32 index = 0; index < 2; index++) { + writer.WriteBit(true); + const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(), + context.scaling_matrix_8.end()); + + writer.WriteScalingList(matrix_x8, index * 64, 64); + } + } + + s32 chroma_qp_index_offset2 = + static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); + chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; + + writer.WriteSe(chroma_qp_index_offset2); + + writer.End(); + + const auto& encoded_header = writer.GetByteArray(); + frame.resize(encoded_header.size() + context.frame_data_size); + std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); + + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, + frame.data() + encoded_header.size(), + context.frame_data_size); + } + + return frame; +} + +H264BitWriter::H264BitWriter() = default; + +H264BitWriter::~H264BitWriter() = default; + +void H264BitWriter::WriteU(s32 value, s32 value_sz) { + WriteBits(value, value_sz); +} + +void H264BitWriter::WriteSe(s32 value) { + WriteExpGolombCodedInt(value); +} + +void H264BitWriter::WriteUe(u32 value) { + WriteExpGolombCodedUInt(value); +} + +void H264BitWriter::End() { + WriteBit(true); + Flush(); +} + +void H264BitWriter::WriteBit(bool state) { + WriteBits(state ? 1 : 0, 1); +} + +void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { + std::vector<u8> scan(count); + if (count == 16) { + std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); + } else { + std::memcpy(scan.data(), zig_zag_direct.data(), scan.size()); + } + u8 last_scale = 8; + + for (s32 index = 0; index < count; index++) { + const u8 value = list[start + scan[index]]; + const s32 delta_scale = static_cast<s32>(value - last_scale); + + WriteSe(delta_scale); + + last_scale = value; + } +} + +std::vector<u8>& H264BitWriter::GetByteArray() { + return byte_array; +} + +const std::vector<u8>& H264BitWriter::GetByteArray() const { + return byte_array; +} + +void H264BitWriter::WriteBits(s32 value, s32 bit_count) { + s32 value_pos = 0; + + s32 remaining = bit_count; + + while (remaining > 0) { + s32 copy_size = remaining; + + const s32 free_bits = GetFreeBufferBits(); + + if (copy_size > free_bits) { + copy_size = free_bits; + } + + const s32 mask = (1 << copy_size) - 1; + + const s32 src_shift = (bit_count - value_pos) - copy_size; + const s32 dst_shift = (buffer_size - buffer_pos) - copy_size; + + buffer |= ((value >> src_shift) & mask) << dst_shift; + + value_pos += copy_size; + buffer_pos += copy_size; + remaining -= copy_size; + } +} + +void H264BitWriter::WriteExpGolombCodedInt(s32 value) { + const s32 sign = value <= 0 ? 0 : 1; + if (value < 0) { + value = -value; + } + value = (value << 1) - sign; + WriteExpGolombCodedUInt(value); +} + +void H264BitWriter::WriteExpGolombCodedUInt(u32 value) { + const s32 size = 32 - Common::CountLeadingZeroes32(static_cast<s32>(value + 1)); + WriteBits(1, size); + + value -= (1U << (size - 1)) - 1; + WriteBits(static_cast<s32>(value), size - 1); +} + +s32 H264BitWriter::GetFreeBufferBits() { + if (buffer_pos == buffer_size) { + Flush(); + } + + return buffer_size - buffer_pos; +} + +void H264BitWriter::Flush() { + if (buffer_pos == 0) { + return; + } + byte_array.push_back(static_cast<u8>(buffer)); + + buffer = 0; + buffer_pos = 0; +} +} // namespace Tegra::Decoder diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h new file mode 100644 index 000000000..f2292fd2f --- /dev/null +++ b/src/video_core/command_classes/codecs/h264.h @@ -0,0 +1,118 @@ +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#pragma once + +#include <vector> +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "video_core/command_classes/nvdec_common.h" + +namespace Tegra { +class GPU; +namespace Decoder { + +class H264BitWriter { +public: + H264BitWriter(); + ~H264BitWriter(); + + /// The following Write methods are based on clause 9.1 in the H.264 specification. + /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax + void WriteU(s32 value, s32 value_sz); + void WriteSe(s32 value); + void WriteUe(u32 value); + + /// Finalize the bitstream + void End(); + + /// append a bit to the stream, equivalent value to the state parameter + void WriteBit(bool state); + + /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification + /// Writes the scaling matrices of the sream + void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); + + /// Return the bitstream as a vector. + std::vector<u8>& GetByteArray(); + const std::vector<u8>& GetByteArray() const; + +private: + void WriteBits(s32 value, s32 bit_count); + void WriteExpGolombCodedInt(s32 value); + void WriteExpGolombCodedUInt(u32 value); + s32 GetFreeBufferBits(); + void Flush(); + + s32 buffer_size{8}; + + s32 buffer{}; + s32 buffer_pos{}; + std::vector<u8> byte_array; +}; + +class H264 { +public: + explicit H264(GPU& gpu); + ~H264(); + + /// Compose the H264 header of the frame for FFmpeg decoding + std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, + bool is_first_frame = false); + +private: + struct H264ParameterSet { + u32 log2_max_pic_order_cnt{}; + u32 delta_pic_order_always_zero_flag{}; + u32 frame_mbs_only_flag{}; + u32 pic_width_in_mbs{}; + u32 pic_height_in_map_units{}; + INSERT_PADDING_WORDS(1); + u32 entropy_coding_mode_flag{}; + u32 bottom_field_pic_order_flag{}; + u32 num_refidx_l0_default_active{}; + u32 num_refidx_l1_default_active{}; + u32 deblocking_filter_control_flag{}; + u32 redundant_pic_count_flag{}; + u32 transform_8x8_mode_flag{}; + INSERT_PADDING_WORDS(9); + u64 flags{}; + u32 frame_number{}; + u32 frame_number2{}; + }; + static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); + + struct H264DecoderContext { + INSERT_PADDING_BYTES(0x48); + u32 frame_data_size{}; + INSERT_PADDING_BYTES(0xc); + H264ParameterSet h264_parameter_set{}; + INSERT_PADDING_BYTES(0x100); + std::array<u8, 0x60> scaling_matrix_4; + std::array<u8, 0x80> scaling_matrix_8; + }; + static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); + + std::vector<u8> frame; + GPU& gpu; +}; + +} // namespace Decoder +} // namespace Tegra diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp new file mode 100644 index 000000000..b3e98aa9f --- /dev/null +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -0,0 +1,1034 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> // for std::memcpy +#include <numeric> +#include "video_core/command_classes/codecs/vp9.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +namespace Tegra::Decoder { +namespace { +// Default compressed header probabilities once frame context resets +constexpr Vp9EntropyProbs default_probs{ + .y_mode_prob{ + 65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78, + 173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29, + }, + .partition_prob{ + 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0, + 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0, + 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0, + 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0, + }, + .coef_probs{ + 195, 29, 183, 0, 84, 49, 136, 0, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 31, 107, 169, 0, 35, 99, 159, 0, 17, 82, 140, 0, 8, 66, 114, 0, + 2, 44, 76, 0, 1, 19, 32, 0, 40, 132, 201, 0, 29, 114, 187, 0, 13, 91, 157, 0, + 7, 75, 127, 0, 3, 58, 95, 0, 1, 28, 47, 0, 69, 142, 221, 0, 42, 122, 201, 0, + 15, 91, 159, 0, 6, 67, 121, 0, 1, 42, 77, 0, 1, 17, 31, 0, 102, 148, 228, 0, + 67, 117, 204, 0, 17, 82, 154, 0, 6, 59, 114, 0, 2, 39, 75, 0, 1, 15, 29, 0, + 156, 57, 233, 0, 119, 57, 212, 0, 58, 48, 163, 0, 29, 40, 124, 0, 12, 30, 81, 0, + 3, 12, 31, 0, 191, 107, 226, 0, 124, 117, 204, 0, 25, 99, 155, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 29, 148, 210, 0, 37, 126, 194, 0, 8, 93, 157, 0, + 2, 68, 118, 0, 1, 39, 69, 0, 1, 17, 33, 0, 41, 151, 213, 0, 27, 123, 193, 0, + 3, 82, 144, 0, 1, 58, 105, 0, 1, 32, 60, 0, 1, 13, 26, 0, 59, 159, 220, 0, + 23, 126, 198, 0, 4, 88, 151, 0, 1, 66, 114, 0, 1, 38, 71, 0, 1, 18, 34, 0, + 114, 136, 232, 0, 51, 114, 207, 0, 11, 83, 155, 0, 3, 56, 105, 0, 1, 33, 65, 0, + 1, 17, 34, 0, 149, 65, 234, 0, 121, 57, 215, 0, 61, 49, 166, 0, 28, 36, 114, 0, + 12, 25, 76, 0, 3, 16, 42, 0, 214, 49, 220, 0, 132, 63, 188, 0, 42, 65, 137, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 137, 221, 0, 104, 131, 216, 0, + 49, 111, 192, 0, 21, 87, 155, 0, 2, 49, 87, 0, 1, 16, 28, 0, 89, 163, 230, 0, + 90, 137, 220, 0, 29, 100, 183, 0, 10, 70, 135, 0, 2, 42, 81, 0, 1, 17, 33, 0, + 108, 167, 237, 0, 55, 133, 222, 0, 15, 97, 179, 0, 4, 72, 135, 0, 1, 45, 85, 0, + 1, 19, 38, 0, 124, 146, 240, 0, 66, 124, 224, 0, 17, 88, 175, 0, 4, 58, 122, 0, + 1, 36, 75, 0, 1, 18, 37, 0, 141, 79, 241, 0, 126, 70, 227, 0, 66, 58, 182, 0, + 30, 44, 136, 0, 12, 34, 96, 0, 2, 20, 47, 0, 229, 99, 249, 0, 143, 111, 235, 0, + 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 158, 236, 0, + 94, 146, 224, 0, 25, 117, 191, 0, 9, 87, 149, 0, 3, 56, 99, 0, 1, 33, 57, 0, + 83, 167, 237, 0, 68, 145, 222, 0, 10, 103, 177, 0, 2, 72, 131, 0, 1, 41, 79, 0, + 1, 20, 39, 0, 99, 167, 239, 0, 47, 141, 224, 0, 10, 104, 178, 0, 2, 73, 133, 0, + 1, 44, 85, 0, 1, 22, 47, 0, 127, 145, 243, 0, 71, 129, 228, 0, 17, 93, 177, 0, + 3, 61, 124, 0, 1, 41, 84, 0, 1, 21, 52, 0, 157, 78, 244, 0, 140, 72, 231, 0, + 69, 58, 184, 0, 31, 44, 137, 0, 14, 38, 105, 0, 8, 23, 61, 0, 125, 34, 187, 0, + 52, 41, 133, 0, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 37, 109, 153, 0, 51, 102, 147, 0, 23, 87, 128, 0, 8, 67, 101, 0, 1, 41, 63, 0, + 1, 19, 29, 0, 31, 154, 185, 0, 17, 127, 175, 0, 6, 96, 145, 0, 2, 73, 114, 0, + 1, 51, 82, 0, 1, 28, 45, 0, 23, 163, 200, 0, 10, 131, 185, 0, 2, 93, 148, 0, + 1, 67, 111, 0, 1, 41, 69, 0, 1, 14, 24, 0, 29, 176, 217, 0, 12, 145, 201, 0, + 3, 101, 156, 0, 1, 69, 111, 0, 1, 39, 63, 0, 1, 14, 23, 0, 57, 192, 233, 0, + 25, 154, 215, 0, 6, 109, 167, 0, 3, 78, 118, 0, 1, 48, 69, 0, 1, 21, 29, 0, + 202, 105, 245, 0, 108, 106, 216, 0, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 33, 172, 219, 0, 64, 149, 206, 0, 14, 117, 177, 0, 5, 90, 141, 0, + 2, 61, 95, 0, 1, 37, 57, 0, 33, 179, 220, 0, 11, 140, 198, 0, 1, 89, 148, 0, + 1, 60, 104, 0, 1, 33, 57, 0, 1, 12, 21, 0, 30, 181, 221, 0, 8, 141, 198, 0, + 1, 87, 145, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 20, 0, 32, 186, 224, 0, + 7, 142, 198, 0, 1, 86, 143, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 22, 0, + 57, 192, 227, 0, 20, 143, 204, 0, 3, 96, 154, 0, 1, 68, 112, 0, 1, 42, 69, 0, + 1, 19, 32, 0, 212, 35, 215, 0, 113, 47, 169, 0, 29, 48, 105, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 74, 129, 203, 0, 106, 120, 203, 0, 49, 107, 178, 0, + 19, 84, 144, 0, 4, 50, 84, 0, 1, 15, 25, 0, 71, 172, 217, 0, 44, 141, 209, 0, + 15, 102, 173, 0, 6, 76, 133, 0, 2, 51, 89, 0, 1, 24, 42, 0, 64, 185, 231, 0, + 31, 148, 216, 0, 8, 103, 175, 0, 3, 74, 131, 0, 1, 46, 81, 0, 1, 18, 30, 0, + 65, 196, 235, 0, 25, 157, 221, 0, 5, 105, 174, 0, 1, 67, 120, 0, 1, 38, 69, 0, + 1, 15, 30, 0, 65, 204, 238, 0, 30, 156, 224, 0, 7, 107, 177, 0, 2, 70, 124, 0, + 1, 42, 73, 0, 1, 18, 34, 0, 225, 86, 251, 0, 144, 104, 235, 0, 42, 99, 181, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 175, 239, 0, 112, 165, 229, 0, + 29, 136, 200, 0, 12, 103, 162, 0, 6, 77, 123, 0, 2, 53, 84, 0, 75, 183, 239, 0, + 30, 155, 221, 0, 3, 106, 171, 0, 1, 74, 128, 0, 1, 44, 76, 0, 1, 17, 28, 0, + 73, 185, 240, 0, 27, 159, 222, 0, 2, 107, 172, 0, 1, 75, 127, 0, 1, 42, 73, 0, + 1, 17, 29, 0, 62, 190, 238, 0, 21, 159, 222, 0, 2, 107, 172, 0, 1, 72, 122, 0, + 1, 40, 71, 0, 1, 18, 32, 0, 61, 199, 240, 0, 27, 161, 226, 0, 4, 113, 180, 0, + 1, 76, 129, 0, 1, 46, 80, 0, 1, 23, 41, 0, 7, 27, 153, 0, 5, 30, 95, 0, + 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 75, 127, 0, + 57, 75, 124, 0, 27, 67, 108, 0, 10, 54, 86, 0, 1, 33, 52, 0, 1, 12, 18, 0, + 43, 125, 151, 0, 26, 108, 148, 0, 7, 83, 122, 0, 2, 59, 89, 0, 1, 38, 60, 0, + 1, 17, 27, 0, 23, 144, 163, 0, 13, 112, 154, 0, 2, 75, 117, 0, 1, 50, 81, 0, + 1, 31, 51, 0, 1, 14, 23, 0, 18, 162, 185, 0, 6, 123, 171, 0, 1, 78, 125, 0, + 1, 51, 86, 0, 1, 31, 54, 0, 1, 14, 23, 0, 15, 199, 227, 0, 3, 150, 204, 0, + 1, 91, 146, 0, 1, 55, 95, 0, 1, 30, 53, 0, 1, 11, 20, 0, 19, 55, 240, 0, + 19, 59, 196, 0, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 41, 166, 207, 0, 104, 153, 199, 0, 31, 123, 181, 0, 14, 101, 152, 0, 5, 72, 106, 0, + 1, 36, 52, 0, 35, 176, 211, 0, 12, 131, 190, 0, 2, 88, 144, 0, 1, 60, 101, 0, + 1, 36, 60, 0, 1, 16, 28, 0, 28, 183, 213, 0, 8, 134, 191, 0, 1, 86, 142, 0, + 1, 56, 96, 0, 1, 30, 53, 0, 1, 12, 20, 0, 20, 190, 215, 0, 4, 135, 192, 0, + 1, 84, 139, 0, 1, 53, 91, 0, 1, 28, 49, 0, 1, 11, 20, 0, 13, 196, 216, 0, + 2, 137, 192, 0, 1, 86, 143, 0, 1, 57, 99, 0, 1, 32, 56, 0, 1, 13, 24, 0, + 211, 29, 217, 0, 96, 47, 156, 0, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 78, 120, 193, 0, 111, 116, 186, 0, 46, 102, 164, 0, 15, 80, 128, 0, + 2, 49, 76, 0, 1, 18, 28, 0, 71, 161, 203, 0, 42, 132, 192, 0, 10, 98, 150, 0, + 3, 69, 109, 0, 1, 44, 70, 0, 1, 18, 29, 0, 57, 186, 211, 0, 30, 140, 196, 0, + 4, 93, 146, 0, 1, 62, 102, 0, 1, 38, 65, 0, 1, 16, 27, 0, 47, 199, 217, 0, + 14, 145, 196, 0, 1, 88, 142, 0, 1, 57, 98, 0, 1, 36, 62, 0, 1, 15, 26, 0, + 26, 219, 229, 0, 5, 155, 207, 0, 1, 94, 151, 0, 1, 60, 104, 0, 1, 36, 62, 0, + 1, 16, 28, 0, 233, 29, 248, 0, 146, 47, 220, 0, 43, 52, 140, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 100, 163, 232, 0, 179, 161, 222, 0, 63, 142, 204, 0, + 37, 113, 174, 0, 26, 89, 137, 0, 18, 68, 97, 0, 85, 181, 230, 0, 32, 146, 209, 0, + 7, 100, 164, 0, 3, 71, 121, 0, 1, 45, 77, 0, 1, 18, 30, 0, 65, 187, 230, 0, + 20, 148, 207, 0, 2, 97, 159, 0, 1, 68, 116, 0, 1, 40, 70, 0, 1, 14, 29, 0, + 40, 194, 227, 0, 8, 147, 204, 0, 1, 94, 155, 0, 1, 65, 112, 0, 1, 39, 66, 0, + 1, 14, 26, 0, 16, 208, 228, 0, 3, 151, 207, 0, 1, 98, 160, 0, 1, 67, 117, 0, + 1, 41, 74, 0, 1, 17, 31, 0, 17, 38, 140, 0, 7, 34, 80, 0, 1, 17, 29, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 75, 128, 0, 41, 76, 128, 0, + 26, 66, 116, 0, 12, 52, 94, 0, 2, 32, 55, 0, 1, 10, 16, 0, 50, 127, 154, 0, + 37, 109, 152, 0, 16, 82, 121, 0, 5, 59, 85, 0, 1, 35, 54, 0, 1, 13, 20, 0, + 40, 142, 167, 0, 17, 110, 157, 0, 2, 71, 112, 0, 1, 44, 72, 0, 1, 27, 45, 0, + 1, 11, 17, 0, 30, 175, 188, 0, 9, 124, 169, 0, 1, 74, 116, 0, 1, 48, 78, 0, + 1, 30, 49, 0, 1, 11, 18, 0, 10, 222, 223, 0, 2, 150, 194, 0, 1, 83, 128, 0, + 1, 48, 79, 0, 1, 27, 45, 0, 1, 11, 17, 0, 36, 41, 235, 0, 29, 36, 193, 0, + 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 165, 222, 0, + 177, 162, 215, 0, 110, 135, 195, 0, 57, 113, 168, 0, 23, 83, 120, 0, 10, 49, 61, 0, + 85, 190, 223, 0, 36, 139, 200, 0, 5, 90, 146, 0, 1, 60, 103, 0, 1, 38, 65, 0, + 1, 18, 30, 0, 72, 202, 223, 0, 23, 141, 199, 0, 2, 86, 140, 0, 1, 56, 97, 0, + 1, 36, 61, 0, 1, 16, 27, 0, 55, 218, 225, 0, 13, 145, 200, 0, 1, 86, 141, 0, + 1, 57, 99, 0, 1, 35, 61, 0, 1, 13, 22, 0, 15, 235, 212, 0, 1, 132, 184, 0, + 1, 84, 139, 0, 1, 57, 97, 0, 1, 34, 56, 0, 1, 14, 23, 0, 181, 21, 201, 0, + 61, 37, 123, 0, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 47, 106, 172, 0, 95, 104, 173, 0, 42, 93, 159, 0, 18, 77, 131, 0, 4, 50, 81, 0, + 1, 17, 23, 0, 62, 147, 199, 0, 44, 130, 189, 0, 28, 102, 154, 0, 18, 75, 115, 0, + 2, 44, 65, 0, 1, 12, 19, 0, 55, 153, 210, 0, 24, 130, 194, 0, 3, 93, 146, 0, + 1, 61, 97, 0, 1, 31, 50, 0, 1, 10, 16, 0, 49, 186, 223, 0, 17, 148, 204, 0, + 1, 96, 142, 0, 1, 53, 83, 0, 1, 26, 44, 0, 1, 11, 17, 0, 13, 217, 212, 0, + 2, 136, 180, 0, 1, 78, 124, 0, 1, 50, 83, 0, 1, 29, 49, 0, 1, 14, 23, 0, + 197, 13, 247, 0, 82, 17, 222, 0, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 126, 186, 247, 0, 234, 191, 243, 0, 176, 177, 234, 0, 104, 158, 220, 0, + 66, 128, 186, 0, 55, 90, 137, 0, 111, 197, 242, 0, 46, 158, 219, 0, 9, 104, 171, 0, + 2, 65, 125, 0, 1, 44, 80, 0, 1, 17, 91, 0, 104, 208, 245, 0, 39, 168, 224, 0, + 3, 109, 162, 0, 1, 79, 124, 0, 1, 50, 102, 0, 1, 43, 102, 0, 84, 220, 246, 0, + 31, 177, 231, 0, 2, 115, 180, 0, 1, 79, 134, 0, 1, 55, 77, 0, 1, 60, 79, 0, + 43, 243, 240, 0, 8, 180, 217, 0, 1, 115, 166, 0, 1, 84, 121, 0, 1, 51, 67, 0, + 1, 16, 6, 0, + }, + .switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144}, + .inter_mode_prob{ + 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94, + 66, 0, 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0, + }, + .intra_inter_prob{9, 102, 187, 225}, + .comp_inter_prob{9, 102, 187, 225, 0}, + .single_ref_prob{33, 16, 77, 74, 142, 142, 172, 170, 238, 247}, + .comp_ref_prob{50, 126, 123, 221, 226}, + .tx_32x32_prob{3, 136, 37, 5, 52, 13}, + .tx_16x16_prob{20, 152, 15, 101}, + .tx_8x8_prob{100, 66}, + .skip_probs{192, 128, 64}, + .joints{32, 64, 96}, + .sign{128, 128}, + .classes{ + 224, 144, 192, 168, 192, 176, 192, 198, 198, 245, + 216, 128, 176, 160, 176, 176, 192, 198, 198, 208, + }, + .class_0{216, 208}, + .prob_bits{ + 136, 140, 148, 160, 176, 192, 224, 234, 234, 240, + 136, 140, 148, 160, 176, 192, 224, 234, 234, 240, + }, + .class_0_fr{128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64}, + .fr{64, 96, 64, 64, 96, 64}, + .class_0_hp{160, 160}, + .high_precision{128, 128}, +}; + +constexpr std::array<s32, 256> norm_lut{ + 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +constexpr std::array<s32, 254> map_lut{ + 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, + 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177, + 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194, + 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212, + 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 19, +}; +} // Anonymous namespace + +VP9::VP9(GPU& gpu) : gpu(gpu) {} + +VP9::~VP9() = default; + +void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { + const bool update = new_prob != old_prob; + + writer.Write(update, diff_update_probability); + + if (update) { + WriteProbabilityDelta(writer, new_prob, old_prob); + } +} +template <typename T, std::size_t N> +void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, + const std::array<T, N>& old_prob) { + for (std::size_t offset = 0; offset < new_prob.size(); ++offset) { + WriteProbabilityUpdate(writer, new_prob[offset], old_prob[offset]); + } +} + +template <typename T, std::size_t N> +void VP9::WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, + const std::array<T, N>& old_prob) { + for (std::size_t offset = 0; offset < new_prob.size(); offset += 4) { + WriteProbabilityUpdate(writer, new_prob[offset + 0], old_prob[offset + 0]); + WriteProbabilityUpdate(writer, new_prob[offset + 1], old_prob[offset + 1]); + WriteProbabilityUpdate(writer, new_prob[offset + 2], old_prob[offset + 2]); + } +} + +void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { + const int delta = RemapProbability(new_prob, old_prob); + + EncodeTermSubExp(writer, delta); +} + +s32 VP9::RemapProbability(s32 new_prob, s32 old_prob) { + new_prob--; + old_prob--; + + std::size_t index{}; + + if (old_prob * 2 <= 0xff) { + index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1)); + } else { + index = static_cast<std::size_t>( + std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); + } + + return map_lut[index]; +} + +s32 VP9::RecenterNonNeg(s32 new_prob, s32 old_prob) { + if (new_prob > old_prob * 2) { + return new_prob; + } else if (new_prob >= old_prob) { + return (new_prob - old_prob) * 2; + } else { + return (old_prob - new_prob) * 2 - 1; + } +} + +void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) { + if (WriteLessThan(writer, value, 16)) { + writer.Write(value, 4); + } else if (WriteLessThan(writer, value, 32)) { + writer.Write(value - 16, 4); + } else if (WriteLessThan(writer, value, 64)) { + writer.Write(value - 32, 5); + } else { + value -= 64; + + constexpr s32 size = 8; + + const s32 mask = (1 << size) - 191; + + const s32 delta = value - mask; + + if (delta < 0) { + writer.Write(value, size - 1); + } else { + writer.Write(delta / 2 + mask, size - 1); + writer.Write(delta & 1, 1); + } + } +} + +bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) { + const bool is_lt = value < test; + writer.Write(!is_lt); + return is_lt; +} + +void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, + const std::array<u8, 2304>& new_prob, + const std::array<u8, 2304>& old_prob) { + // Note: There's 1 byte added on each packet for alignment, + // this byte is ignored when doing updates. + constexpr s32 block_bytes = 2 * 2 * 6 * 6 * 4; + + const auto needs_update = [&](s32 base_index) -> bool { + s32 index = base_index; + for (s32 i = 0; i < 2; i++) { + for (s32 j = 0; j < 2; j++) { + for (s32 k = 0; k < 6; k++) { + for (s32 l = 0; l < 6; l++) { + if (new_prob[index + 0] != old_prob[index + 0] || + new_prob[index + 1] != old_prob[index + 1] || + new_prob[index + 2] != old_prob[index + 2]) { + return true; + } + + index += 4; + } + } + } + } + return false; + }; + + for (s32 block_index = 0; block_index < 4; block_index++) { + const s32 base_index = block_index * block_bytes; + const bool update = needs_update(base_index); + writer.Write(update); + + if (update) { + s32 index = base_index; + for (s32 i = 0; i < 2; i++) { + for (s32 j = 0; j < 2; j++) { + for (s32 k = 0; k < 6; k++) { + for (s32 l = 0; l < 6; l++) { + if (k != 0 || l < 3) { + WriteProbabilityUpdate(writer, new_prob[index + 0], + old_prob[index + 0]); + WriteProbabilityUpdate(writer, new_prob[index + 1], + old_prob[index + 1]); + WriteProbabilityUpdate(writer, new_prob[index + 2], + old_prob[index + 2]); + } + index += 4; + } + } + } + } + } + + if (block_index == tx_mode) { + break; + } + } +} + +void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { + const bool update = new_prob != old_prob; + writer.Write(update, diff_update_probability); + + if (update) { + writer.Write(new_prob >> 1, 7); + } +} + +s32 VP9::CalcMinLog2TileCols(s32 frame_width) { + const s32 sb64_cols = (frame_width + 63) / 64; + s32 min_log2 = 0; + + while ((64 << min_log2) < sb64_cols) { + min_log2++; + } + + return min_log2; +} + +s32 VP9::CalcMaxLog2TileCols(s32 frameWidth) { + const s32 sb64_cols = (frameWidth + 63) / 64; + s32 max_log2 = 1; + + while ((sb64_cols >> max_log2) >= 4) { + max_log2++; + } + + return max_log2 - 1; +} + +Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { + PictureInfo picture_info{}; + gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); + Vp9PictureInfo vp9_info = picture_info.Convert(); + + InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); + + // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following + // order: last, golden, altref, current. It may be worthwhile to track the updates done here + // to avoid buffering frame data needed for reference frame updating in the header composition. + std::memcpy(vp9_info.frame_offsets.data(), state.surface_luma_offset.data(), 4 * sizeof(u64)); + + return vp9_info; +} + +void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { + EntropyProbs entropy{}; + gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); + entropy.Convert(dst); +} + +Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) { + Vp9FrameContainer frame{}; + { + gpu.SyncGuestHost(); + frame.info = GetVp9PictureInfo(state); + + frame.bit_stream.resize(frame.info.bitstream_size); + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(), + frame.info.bitstream_size); + } + // Buffer two frames, saving the last show frame info + if (!next_next_frame.bit_stream.empty()) { + Vp9FrameContainer temp{ + .info = frame.info, + .bit_stream = frame.bit_stream, + }; + next_next_frame.info.show_frame = frame.info.last_frame_shown; + frame.info = next_next_frame.info; + frame.bit_stream = next_next_frame.bit_stream; + next_next_frame = std::move(temp); + + if (!next_frame.bit_stream.empty()) { + Vp9FrameContainer temp2{ + .info = frame.info, + .bit_stream = frame.bit_stream, + }; + next_frame.info.show_frame = frame.info.last_frame_shown; + frame.info = next_frame.info; + frame.bit_stream = next_frame.bit_stream; + next_frame = std::move(temp2); + } else { + next_frame.info = frame.info; + next_frame.bit_stream = frame.bit_stream; + } + } else { + next_next_frame.info = frame.info; + next_next_frame.bit_stream = frame.bit_stream; + } + return frame; +} + +std::vector<u8> VP9::ComposeCompressedHeader() { + VpxRangeEncoder writer{}; + + if (!current_frame_info.lossless) { + if (static_cast<u32>(current_frame_info.transform_mode) >= 3) { + writer.Write(3, 2); + writer.Write(current_frame_info.transform_mode == 4); + } else { + writer.Write(current_frame_info.transform_mode, 2); + } + } + + if (current_frame_info.transform_mode == 4) { + // tx_mode_probs() in the spec + WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_8x8_prob, + prev_frame_probs.tx_8x8_prob); + WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_16x16_prob, + prev_frame_probs.tx_16x16_prob); + WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob, + prev_frame_probs.tx_32x32_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob; + prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob; + prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob; + } + } + // read_coef_probs() in the spec + WriteCoefProbabilityUpdate(writer, current_frame_info.transform_mode, + current_frame_info.entropy.coef_probs, prev_frame_probs.coef_probs); + // read_skip_probs() in the spec + WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs, + prev_frame_probs.skip_probs); + + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs; + prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs; + } + + if (!current_frame_info.intra_only) { + // read_inter_probs() in the spec + WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob, + prev_frame_probs.inter_mode_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob; + } + + if (current_frame_info.interp_filter == 4) { + // read_interp_filter_probs() in the spec + WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob, + prev_frame_probs.switchable_interp_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.switchable_interp_prob = + current_frame_info.entropy.switchable_interp_prob; + } + } + + // read_is_inter_probs() in the spec + WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob, + prev_frame_probs.intra_inter_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob; + } + // frame_reference_mode() in the spec + if ((current_frame_info.ref_frame_sign_bias[1] & 1) != + (current_frame_info.ref_frame_sign_bias[2] & 1) || + (current_frame_info.ref_frame_sign_bias[1] & 1) != + (current_frame_info.ref_frame_sign_bias[3] & 1)) { + if (current_frame_info.reference_mode >= 1) { + writer.Write(1, 1); + writer.Write(current_frame_info.reference_mode == 2); + } else { + writer.Write(0, 1); + } + } + + // frame_reference_mode_probs() in the spec + if (current_frame_info.reference_mode == 2) { + WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob, + prev_frame_probs.comp_inter_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob; + } + } + + if (current_frame_info.reference_mode != 1) { + WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob, + prev_frame_probs.single_ref_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob; + } + } + + if (current_frame_info.reference_mode != 0) { + WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob, + prev_frame_probs.comp_ref_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob; + } + } + + // read_y_mode_probs + for (std::size_t index = 0; index < current_frame_info.entropy.y_mode_prob.size(); + ++index) { + WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index], + prev_frame_probs.y_mode_prob[index]); + } + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob; + } + // read_partition_probs + WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob, + prev_frame_probs.partition_prob); + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob; + } + + // mv_probs + for (s32 i = 0; i < 3; i++) { + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i], + prev_frame_probs.joints[i]); + } + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.joints = current_frame_info.entropy.joints; + } + + for (s32 i = 0; i < 2; i++) { + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i], + prev_frame_probs.sign[i]); + + for (s32 j = 0; j < 10; j++) { + const int index = i * 10 + j; + + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index], + prev_frame_probs.classes[index]); + } + + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i], + prev_frame_probs.class_0[i]); + + for (s32 j = 0; j < 10; j++) { + const int index = i * 10 + j; + + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index], + prev_frame_probs.prob_bits[index]); + } + } + + for (s32 i = 0; i < 2; i++) { + for (s32 j = 0; j < 2; j++) { + for (s32 k = 0; k < 3; k++) { + const int index = i * 2 * 3 + j * 3 + k; + + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index], + prev_frame_probs.class_0_fr[index]); + } + } + + for (s32 j = 0; j < 3; j++) { + const int index = i * 3 + j; + + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index], + prev_frame_probs.fr[index]); + } + } + + if (current_frame_info.allow_high_precision_mv) { + for (s32 index = 0; index < 2; index++) { + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_hp[index], + prev_frame_probs.class_0_hp[index]); + WriteMvProbabilityUpdate(writer, current_frame_info.entropy.high_precision[index], + prev_frame_probs.high_precision[index]); + } + } + + // save previous probs + if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { + prev_frame_probs.sign = current_frame_info.entropy.sign; + prev_frame_probs.classes = current_frame_info.entropy.classes; + prev_frame_probs.class_0 = current_frame_info.entropy.class_0; + prev_frame_probs.prob_bits = current_frame_info.entropy.prob_bits; + prev_frame_probs.class_0_fr = current_frame_info.entropy.class_0_fr; + prev_frame_probs.fr = current_frame_info.entropy.fr; + prev_frame_probs.class_0_hp = current_frame_info.entropy.class_0_hp; + prev_frame_probs.high_precision = current_frame_info.entropy.high_precision; + } + } + + writer.End(); + return writer.GetBuffer(); +} + +VpxBitStreamWriter VP9::ComposeUncompressedHeader() { + VpxBitStreamWriter uncomp_writer{}; + + uncomp_writer.WriteU(2, 2); // Frame marker. + uncomp_writer.WriteU(0, 2); // Profile. + uncomp_writer.WriteBit(false); // Show existing frame. + uncomp_writer.WriteBit(!current_frame_info.is_key_frame); // is key frame? + uncomp_writer.WriteBit(current_frame_info.show_frame); // show frame? + uncomp_writer.WriteBit(current_frame_info.error_resilient_mode); // error reslience + + if (current_frame_info.is_key_frame) { + uncomp_writer.WriteU(frame_sync_code, 24); + uncomp_writer.WriteU(0, 3); // Color space. + uncomp_writer.WriteU(0, 1); // Color range. + uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16); + uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16); + uncomp_writer.WriteBit(false); // Render and frame size different. + + // Reset context + prev_frame_probs = default_probs; + swap_next_golden = false; + loop_filter_ref_deltas.fill(0); + loop_filter_mode_deltas.fill(0); + + // allow frames offsets to stabilize before checking for golden frames + grace_period = 4; + + // On key frames, all frame slots are set to the current frame, + // so the value of the selected slot doesn't really matter. + frame_ctxs.fill({current_frame_number, false, default_probs}); + + // intra only, meaning the frame can be recreated with no other references + current_frame_info.intra_only = true; + + } else { + + if (!current_frame_info.show_frame) { + uncomp_writer.WriteBit(current_frame_info.intra_only); + if (!current_frame_info.last_frame_was_key) { + swap_next_golden = !swap_next_golden; + } + } else { + current_frame_info.intra_only = false; + } + if (!current_frame_info.error_resilient_mode) { + uncomp_writer.WriteU(0, 2); // Reset frame context. + } + + // Last, Golden, Altref frames + std::array<s32, 3> ref_frame_index{0, 1, 2}; + + // Set when next frame is hidden + // altref and golden references are swapped + if (swap_next_golden) { + ref_frame_index = std::array<s32, 3>{0, 2, 1}; + } + + // update Last Frame + u64 refresh_frame_flags = 1; + + // golden frame may refresh, determined if the next golden frame offset is changed + bool golden_refresh = false; + if (grace_period <= 0) { + for (s32 index = 1; index < 3; ++index) { + if (current_frame_info.frame_offsets[index] != + next_frame.info.frame_offsets[index]) { + current_frame_info.refresh_frame[index] = true; + golden_refresh = true; + grace_period = 3; + } + } + } + + if (current_frame_info.show_frame && + (!next_frame.info.show_frame || next_frame.info.is_key_frame)) { + // Update golden frame + refresh_frame_flags = swap_next_golden ? 2 : 4; + } + + if (!current_frame_info.show_frame) { + // Update altref + refresh_frame_flags = swap_next_golden ? 2 : 4; + } else if (golden_refresh) { + refresh_frame_flags = 3; + } + + if (current_frame_info.intra_only) { + uncomp_writer.WriteU(frame_sync_code, 24); + uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8); + uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16); + uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16); + uncomp_writer.WriteBit(false); // Render and frame size different. + } else { + uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8); + + for (s32 index = 1; index < 4; index++) { + uncomp_writer.WriteU(ref_frame_index[index - 1], 3); + uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1); + } + + uncomp_writer.WriteBit(true); // Frame size with refs. + uncomp_writer.WriteBit(false); // Render and frame size different. + uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv); + uncomp_writer.WriteBit(current_frame_info.interp_filter == 4); + + if (current_frame_info.interp_filter != 4) { + uncomp_writer.WriteU(current_frame_info.interp_filter, 2); + } + } + } + + if (!current_frame_info.error_resilient_mode) { + uncomp_writer.WriteBit(true); // Refresh frame context. where do i get this info from? + uncomp_writer.WriteBit(true); // Frame parallel decoding mode. + } + + int frame_ctx_idx = 0; + if (!current_frame_info.show_frame) { + frame_ctx_idx = 1; + } + + uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index. + prev_frame_probs = + frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header + frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy}; + + uncomp_writer.WriteU(current_frame_info.first_level, 6); + uncomp_writer.WriteU(current_frame_info.sharpness_level, 3); + uncomp_writer.WriteBit(current_frame_info.mode_ref_delta_enabled); + + if (current_frame_info.mode_ref_delta_enabled) { + // check if ref deltas are different, update accordingly + std::array<bool, 4> update_loop_filter_ref_deltas; + std::array<bool, 2> update_loop_filter_mode_deltas; + + bool loop_filter_delta_update = false; + + for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) { + const s8 old_deltas = loop_filter_ref_deltas[index]; + const s8 new_deltas = current_frame_info.ref_deltas[index]; + const bool differing_delta = old_deltas != new_deltas; + + update_loop_filter_ref_deltas[index] = differing_delta; + loop_filter_delta_update |= differing_delta; + } + + for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) { + const s8 old_deltas = loop_filter_mode_deltas[index]; + const s8 new_deltas = current_frame_info.mode_deltas[index]; + const bool differing_delta = old_deltas != new_deltas; + + update_loop_filter_mode_deltas[index] = differing_delta; + loop_filter_delta_update |= differing_delta; + } + + uncomp_writer.WriteBit(loop_filter_delta_update); + + if (loop_filter_delta_update) { + for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) { + uncomp_writer.WriteBit(update_loop_filter_ref_deltas[index]); + + if (update_loop_filter_ref_deltas[index]) { + uncomp_writer.WriteS(current_frame_info.ref_deltas[index], 6); + } + } + + for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) { + uncomp_writer.WriteBit(update_loop_filter_mode_deltas[index]); + + if (update_loop_filter_mode_deltas[index]) { + uncomp_writer.WriteS(current_frame_info.mode_deltas[index], 6); + } + } + // save new deltas + loop_filter_ref_deltas = current_frame_info.ref_deltas; + loop_filter_mode_deltas = current_frame_info.mode_deltas; + } + } + + uncomp_writer.WriteU(current_frame_info.base_q_index, 8); + + uncomp_writer.WriteDeltaQ(current_frame_info.y_dc_delta_q); + uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q); + uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q); + + uncomp_writer.WriteBit(false); // Segmentation enabled (TODO). + + const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width); + const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width); + + const s32 tile_cols_log2_diff = current_frame_info.log2_tile_cols - min_tile_cols_log2; + const s32 tile_cols_log2_inc_mask = (1 << tile_cols_log2_diff) - 1; + + // If it's less than the maximum, we need to add an extra 0 on the bitstream + // to indicate that it should stop reading. + if (current_frame_info.log2_tile_cols < max_tile_cols_log2) { + uncomp_writer.WriteU(tile_cols_log2_inc_mask << 1, tile_cols_log2_diff + 1); + } else { + uncomp_writer.WriteU(tile_cols_log2_inc_mask, tile_cols_log2_diff); + } + + const bool tile_rows_log2_is_nonzero = current_frame_info.log2_tile_rows != 0; + + uncomp_writer.WriteBit(tile_rows_log2_is_nonzero); + + if (tile_rows_log2_is_nonzero) { + uncomp_writer.WriteBit(current_frame_info.log2_tile_rows > 1); + } + + return uncomp_writer; +} + +std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { + std::vector<u8> bitstream; + { + Vp9FrameContainer curr_frame = GetCurrentFrame(state); + current_frame_info = curr_frame.info; + bitstream = std::move(curr_frame.bit_stream); + } + + // The uncompressed header routine sets PrevProb parameters needed for the compressed header + auto uncomp_writer = ComposeUncompressedHeader(); + std::vector<u8> compressed_header = ComposeCompressedHeader(); + + uncomp_writer.WriteU(static_cast<s32>(compressed_header.size()), 16); + uncomp_writer.Flush(); + std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray(); + + // Write headers and frame to buffer + frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size()); + std::memcpy(frame.data(), uncompressed_header.data(), uncompressed_header.size()); + std::memcpy(frame.data() + uncompressed_header.size(), compressed_header.data(), + compressed_header.size()); + std::memcpy(frame.data() + uncompressed_header.size() + compressed_header.size(), + bitstream.data(), bitstream.size()); + + // keep track of frame number + current_frame_number++; + grace_period--; + + // don't display hidden frames + hidden = !current_frame_info.show_frame; + return frame; +} + +VpxRangeEncoder::VpxRangeEncoder() { + Write(false); +} + +VpxRangeEncoder::~VpxRangeEncoder() = default; + +void VpxRangeEncoder::Write(s32 value, s32 value_size) { + for (s32 bit = value_size - 1; bit >= 0; bit--) { + Write(((value >> bit) & 1) != 0); + } +} + +void VpxRangeEncoder::Write(bool bit) { + Write(bit, half_probability); +} + +void VpxRangeEncoder::Write(bool bit, s32 probability) { + u32 local_range = range; + const u32 split = 1 + (((local_range - 1) * static_cast<u32>(probability)) >> 8); + local_range = split; + + if (bit) { + low_value += split; + local_range = range - split; + } + + s32 shift = norm_lut[local_range]; + local_range <<= shift; + count += shift; + + if (count >= 0) { + const s32 offset = shift - count; + + if (((low_value << (offset - 1)) >> 31) != 0) { + const s32 current_pos = static_cast<s32>(base_stream.GetPosition()); + base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos); + while (PeekByte() == 0xff) { + base_stream.WriteByte(0); + + base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos); + } + base_stream.WriteByte(static_cast<u8>((PeekByte() + 1))); + base_stream.Seek(current_pos, Common::SeekOrigin::SetOrigin); + } + base_stream.WriteByte(static_cast<u8>((low_value >> (24 - offset)))); + + low_value <<= offset; + shift = count; + low_value &= 0xffffff; + count -= 8; + } + + low_value <<= shift; + range = local_range; +} + +void VpxRangeEncoder::End() { + for (std::size_t index = 0; index < 32; ++index) { + Write(false); + } +} + +u8 VpxRangeEncoder::PeekByte() { + const u8 value = base_stream.ReadByte(); + base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos); + + return value; +} + +VpxBitStreamWriter::VpxBitStreamWriter() = default; + +VpxBitStreamWriter::~VpxBitStreamWriter() = default; + +void VpxBitStreamWriter::WriteU(u32 value, u32 value_size) { + WriteBits(value, value_size); +} + +void VpxBitStreamWriter::WriteS(s32 value, u32 value_size) { + const bool sign = value < 0; + if (sign) { + value = -value; + } + + WriteBits(static_cast<u32>(value << 1) | (sign ? 1 : 0), value_size + 1); +} + +void VpxBitStreamWriter::WriteDeltaQ(u32 value) { + const bool delta_coded = value != 0; + WriteBit(delta_coded); + + if (delta_coded) { + WriteBits(value, 4); + } +} + +void VpxBitStreamWriter::WriteBits(u32 value, u32 bit_count) { + s32 value_pos = 0; + s32 remaining = bit_count; + + while (remaining > 0) { + s32 copy_size = remaining; + + const s32 free = GetFreeBufferBits(); + + if (copy_size > free) { + copy_size = free; + } + + const s32 mask = (1 << copy_size) - 1; + + const s32 src_shift = (bit_count - value_pos) - copy_size; + const s32 dst_shift = (buffer_size - buffer_pos) - copy_size; + + buffer |= ((value >> src_shift) & mask) << dst_shift; + + value_pos += copy_size; + buffer_pos += copy_size; + remaining -= copy_size; + } +} + +void VpxBitStreamWriter::WriteBit(bool state) { + WriteBits(state ? 1 : 0, 1); +} + +s32 VpxBitStreamWriter::GetFreeBufferBits() { + if (buffer_pos == buffer_size) { + Flush(); + } + + return buffer_size - buffer_pos; +} + +void VpxBitStreamWriter::Flush() { + if (buffer_pos == 0) { + return; + } + byte_array.push_back(static_cast<u8>(buffer)); + buffer = 0; + buffer_pos = 0; +} + +std::vector<u8>& VpxBitStreamWriter::GetByteArray() { + return byte_array; +} + +const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const { + return byte_array; +} + +} // namespace Tegra::Decoder diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h new file mode 100644 index 000000000..dc52ddbde --- /dev/null +++ b/src/video_core/command_classes/codecs/vp9.h @@ -0,0 +1,188 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <vector> + +#include "common/common_types.h" +#include "common/stream.h" +#include "video_core/command_classes/codecs/vp9_types.h" +#include "video_core/command_classes/nvdec_common.h" + +namespace Tegra { +class GPU; +enum class FrameType { KeyFrame = 0, InterFrame = 1 }; +namespace Decoder { + +/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the +/// VP9 header bitstreams. + +class VpxRangeEncoder { +public: + VpxRangeEncoder(); + ~VpxRangeEncoder(); + + /// Writes the rightmost value_size bits from value into the stream + void Write(s32 value, s32 value_size); + + /// Writes a single bit with half probability + void Write(bool bit); + + /// Writes a bit to the base_stream encoded with probability + void Write(bool bit, s32 probability); + + /// Signal the end of the bitstream + void End(); + + std::vector<u8>& GetBuffer() { + return base_stream.GetBuffer(); + } + + const std::vector<u8>& GetBuffer() const { + return base_stream.GetBuffer(); + } + +private: + u8 PeekByte(); + Common::Stream base_stream{}; + u32 low_value{}; + u32 range{0xff}; + s32 count{-24}; + s32 half_probability{128}; +}; + +class VpxBitStreamWriter { +public: + VpxBitStreamWriter(); + ~VpxBitStreamWriter(); + + /// Write an unsigned integer value + void WriteU(u32 value, u32 value_size); + + /// Write a signed integer value + void WriteS(s32 value, u32 value_size); + + /// Based on 6.2.10 of VP9 Spec, writes a delta coded value + void WriteDeltaQ(u32 value); + + /// Write a single bit. + void WriteBit(bool state); + + /// Pushes current buffer into buffer_array, resets buffer + void Flush(); + + /// Returns byte_array + std::vector<u8>& GetByteArray(); + + /// Returns const byte_array + const std::vector<u8>& GetByteArray() const; + +private: + /// Write bit_count bits from value into buffer + void WriteBits(u32 value, u32 bit_count); + + /// Gets next available position in buffer, invokes Flush() if buffer is full + s32 GetFreeBufferBits(); + + s32 buffer_size{8}; + + s32 buffer{}; + s32 buffer_pos{}; + std::vector<u8> byte_array; +}; + +class VP9 { +public: + explicit VP9(GPU& gpu); + ~VP9(); + + /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec + /// documentation + std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); + + /// Returns true if the most recent frame was a hidden frame. + bool WasFrameHidden() const { + return hidden; + } + +private: + /// Generates compressed header probability updates in the bitstream writer + template <typename T, std::size_t N> + void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, + const std::array<T, N>& old_prob); + + /// Generates compressed header probability updates in the bitstream writer + /// If probs are not equal, WriteProbabilityDelta is invoked + void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); + + /// Generates compressed header probability deltas in the bitstream writer + void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); + + /// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification + s32 RemapProbability(s32 new_prob, s32 old_prob); + + /// Recenters probability. Based on section 6.3.6 of VP9 Specification + s32 RecenterNonNeg(s32 new_prob, s32 old_prob); + + /// Inverse of 6.3.4 Decode term subexp + void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value); + + /// Writes if the value is less than the test value + bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test); + + /// Writes probability updates for the Coef probabilities + void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, + const std::array<u8, 2304>& new_prob, + const std::array<u8, 2304>& old_prob); + + /// Write probabilities for 4-byte aligned structures + template <typename T, std::size_t N> + void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, + const std::array<T, N>& old_prob); + + /// Write motion vector probability updates. 6.3.17 in the spec + void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); + + /// 6.2.14 Tile size calculation + s32 CalcMinLog2TileCols(s32 frame_width); + s32 CalcMaxLog2TileCols(s32 frame_width); + + /// Returns VP9 information from NVDEC provided offset and size + Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state); + + /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct + void InsertEntropy(u64 offset, Vp9EntropyProbs& dst); + + /// Returns frame to be decoded after buffering + Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state); + + /// Use NVDEC providied information to compose the headers for the current frame + std::vector<u8> ComposeCompressedHeader(); + VpxBitStreamWriter ComposeUncompressedHeader(); + + GPU& gpu; + std::vector<u8> frame; + + std::array<s8, 4> loop_filter_ref_deltas{}; + std::array<s8, 2> loop_filter_mode_deltas{}; + + bool hidden; + s64 current_frame_number = -2; // since we buffer 2 frames + s32 grace_period = 6; // frame offsets need to stabilize + std::array<FrameContexts, 4> frame_ctxs{}; + Vp9FrameContainer next_frame{}; + Vp9FrameContainer next_next_frame{}; + bool swap_next_golden{}; + + Vp9PictureInfo current_frame_info{}; + Vp9EntropyProbs prev_frame_probs{}; + + s32 diff_update_probability = 252; + s32 frame_sync_code = 0x498342; +}; + +} // namespace Decoder +} // namespace Tegra diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h new file mode 100644 index 000000000..a50acf6e8 --- /dev/null +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -0,0 +1,367 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <cstring> +#include <vector> +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Tegra { +class GPU; + +namespace Decoder { +struct Vp9FrameDimensions { + s16 width{}; + s16 height{}; + s16 luma_pitch{}; + s16 chroma_pitch{}; +}; +static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); + +enum FrameFlags : u32 { + IsKeyFrame = 1 << 0, + LastFrameIsKeyFrame = 1 << 1, + FrameSizeChanged = 1 << 2, + ErrorResilientMode = 1 << 3, + LastShowFrame = 1 << 4, + IntraOnly = 1 << 5, +}; + +enum class MvJointType { + MvJointZero = 0, /* Zero vector */ + MvJointHnzvz = 1, /* Vert zero, hor nonzero */ + MvJointHzvnz = 2, /* Hor zero, vert nonzero */ + MvJointHnzvnz = 3, /* Both components nonzero */ +}; +enum class MvClassType { + MvClass0 = 0, /* (0, 2] integer pel */ + MvClass1 = 1, /* (2, 4] integer pel */ + MvClass2 = 2, /* (4, 8] integer pel */ + MvClass3 = 3, /* (8, 16] integer pel */ + MvClass4 = 4, /* (16, 32] integer pel */ + MvClass5 = 5, /* (32, 64] integer pel */ + MvClass6 = 6, /* (64, 128] integer pel */ + MvClass7 = 7, /* (128, 256] integer pel */ + MvClass8 = 8, /* (256, 512] integer pel */ + MvClass9 = 9, /* (512, 1024] integer pel */ + MvClass10 = 10, /* (1024,2048] integer pel */ +}; + +enum class BlockSize { + Block4x4 = 0, + Block4x8 = 1, + Block8x4 = 2, + Block8x8 = 3, + Block8x16 = 4, + Block16x8 = 5, + Block16x16 = 6, + Block16x32 = 7, + Block32x16 = 8, + Block32x32 = 9, + Block32x64 = 10, + Block64x32 = 11, + Block64x64 = 12, + BlockSizes = 13, + BlockInvalid = BlockSizes +}; + +enum class PredictionMode { + DcPred = 0, // Average of above and left pixels + VPred = 1, // Vertical + HPred = 2, // Horizontal + D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi) + D135Pred = 4, // Directional 135 deg = 180 - 45 + D117Pred = 5, // Directional 117 deg = 180 - 63 + D153Pred = 6, // Directional 153 deg = 180 - 27 + D207Pred = 7, // Directional 207 deg = 180 + 27 + D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi) + TmPred = 9, // True-motion + NearestMv = 10, + NearMv = 11, + ZeroMv = 12, + NewMv = 13, + MbModeCount = 14 +}; + +enum class TxSize { + Tx4x4 = 0, // 4x4 transform + Tx8x8 = 1, // 8x8 transform + Tx16x16 = 2, // 16x16 transform + Tx32x32 = 3, // 32x32 transform + TxSizes = 4 +}; + +enum class TxMode { + Only4X4 = 0, // Only 4x4 transform used + Allow8X8 = 1, // Allow block transform size up to 8x8 + Allow16X16 = 2, // Allow block transform size up to 16x16 + Allow32X32 = 3, // Allow block transform size up to 32x32 + TxModeSelect = 4, // Transform specified for each block + TxModes = 5 +}; + +enum class reference_mode { + SingleReference = 0, + CompoundReference = 1, + ReferenceModeSelect = 2, + ReferenceModes = 3 +}; + +struct Segmentation { + u8 enabled{}; + u8 update_map{}; + u8 temporal_update{}; + u8 abs_delta{}; + std::array<u32, 8> feature_mask{}; + std::array<std::array<s16, 4>, 8> feature_data{}; +}; +static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); + +struct LoopFilter { + u8 mode_ref_delta_enabled{}; + std::array<s8, 4> ref_deltas{}; + std::array<s8, 2> mode_deltas{}; +}; +static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); + +struct Vp9EntropyProbs { + std::array<u8, 36> y_mode_prob{}; + std::array<u8, 64> partition_prob{}; + std::array<u8, 2304> coef_probs{}; + std::array<u8, 8> switchable_interp_prob{}; + std::array<u8, 28> inter_mode_prob{}; + std::array<u8, 4> intra_inter_prob{}; + std::array<u8, 5> comp_inter_prob{}; + std::array<u8, 10> single_ref_prob{}; + std::array<u8, 5> comp_ref_prob{}; + std::array<u8, 6> tx_32x32_prob{}; + std::array<u8, 4> tx_16x16_prob{}; + std::array<u8, 2> tx_8x8_prob{}; + std::array<u8, 3> skip_probs{}; + std::array<u8, 3> joints{}; + std::array<u8, 2> sign{}; + std::array<u8, 20> classes{}; + std::array<u8, 2> class_0{}; + std::array<u8, 20> prob_bits{}; + std::array<u8, 12> class_0_fr{}; + std::array<u8, 6> fr{}; + std::array<u8, 2> class_0_hp{}; + std::array<u8, 2> high_precision{}; +}; +static_assert(sizeof(Vp9EntropyProbs) == 0x9F4, "Vp9EntropyProbs is an invalid size"); + +struct Vp9PictureInfo { + bool is_key_frame{}; + bool intra_only{}; + bool last_frame_was_key{}; + bool frame_size_changed{}; + bool error_resilient_mode{}; + bool last_frame_shown{}; + bool show_frame{}; + std::array<s8, 4> ref_frame_sign_bias{}; + s32 base_q_index{}; + s32 y_dc_delta_q{}; + s32 uv_dc_delta_q{}; + s32 uv_ac_delta_q{}; + bool lossless{}; + s32 transform_mode{}; + bool allow_high_precision_mv{}; + s32 interp_filter{}; + s32 reference_mode{}; + s8 comp_fixed_ref{}; + std::array<s8, 2> comp_var_ref{}; + s32 log2_tile_cols{}; + s32 log2_tile_rows{}; + bool segment_enabled{}; + bool segment_map_update{}; + bool segment_map_temporal_update{}; + s32 segment_abs_delta{}; + std::array<u32, 8> segment_feature_enable{}; + std::array<std::array<s16, 4>, 8> segment_feature_data{}; + bool mode_ref_delta_enabled{}; + bool use_prev_in_find_mv_refs{}; + std::array<s8, 4> ref_deltas{}; + std::array<s8, 2> mode_deltas{}; + Vp9EntropyProbs entropy{}; + Vp9FrameDimensions frame_size{}; + u8 first_level{}; + u8 sharpness_level{}; + u32 bitstream_size{}; + std::array<u64, 4> frame_offsets{}; + std::array<bool, 4> refresh_frame{}; +}; + +struct Vp9FrameContainer { + Vp9PictureInfo info{}; + std::vector<u8> bit_stream; +}; + +struct PictureInfo { + INSERT_PADDING_WORDS(12); + u32 bitstream_size{}; + INSERT_PADDING_WORDS(5); + Vp9FrameDimensions last_frame_size{}; + Vp9FrameDimensions golden_frame_size{}; + Vp9FrameDimensions alt_frame_size{}; + Vp9FrameDimensions current_frame_size{}; + u32 vp9_flags{}; + std::array<s8, 4> ref_frame_sign_bias{}; + u8 first_level{}; + u8 sharpness_level{}; + u8 base_q_index{}; + u8 y_dc_delta_q{}; + u8 uv_ac_delta_q{}; + u8 uv_dc_delta_q{}; + u8 lossless{}; + u8 tx_mode{}; + u8 allow_high_precision_mv{}; + u8 interp_filter{}; + u8 reference_mode{}; + s8 comp_fixed_ref{}; + std::array<s8, 2> comp_var_ref{}; + u8 log2_tile_cols{}; + u8 log2_tile_rows{}; + Segmentation segmentation{}; + LoopFilter loop_filter{}; + INSERT_PADDING_BYTES(5); + u32 surface_params{}; + INSERT_PADDING_WORDS(3); + + Vp9PictureInfo Convert() const { + + return Vp9PictureInfo{ + .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, + .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, + .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, + .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, + .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, + .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, + .ref_frame_sign_bias = ref_frame_sign_bias, + .base_q_index = base_q_index, + .y_dc_delta_q = y_dc_delta_q, + .uv_dc_delta_q = uv_dc_delta_q, + .uv_ac_delta_q = uv_ac_delta_q, + .lossless = lossless != 0, + .transform_mode = tx_mode, + .allow_high_precision_mv = allow_high_precision_mv != 0, + .interp_filter = interp_filter, + .reference_mode = reference_mode, + .comp_fixed_ref = comp_fixed_ref, + .comp_var_ref = comp_var_ref, + .log2_tile_cols = log2_tile_cols, + .log2_tile_rows = log2_tile_rows, + .segment_enabled = segmentation.enabled != 0, + .segment_map_update = segmentation.update_map != 0, + .segment_map_temporal_update = segmentation.temporal_update != 0, + .segment_abs_delta = segmentation.abs_delta, + .segment_feature_enable = segmentation.feature_mask, + .segment_feature_data = segmentation.feature_data, + .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0, + .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) && + !(vp9_flags == (FrameFlags::FrameSizeChanged)) && + !(vp9_flags == (FrameFlags::IntraOnly)) && + (vp9_flags == (FrameFlags::LastShowFrame)) && + !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), + .ref_deltas = loop_filter.ref_deltas, + .mode_deltas = loop_filter.mode_deltas, + .frame_size = current_frame_size, + .first_level = first_level, + .sharpness_level = sharpness_level, + .bitstream_size = bitstream_size, + }; + } +}; +static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); + +struct EntropyProbs { + INSERT_PADDING_BYTES(1024); + std::array<std::array<u8, 4>, 7> inter_mode_prob{}; + std::array<u8, 4> intra_inter_prob{}; + INSERT_PADDING_BYTES(80); + std::array<std::array<u8, 1>, 2> tx_8x8_prob{}; + std::array<std::array<u8, 2>, 2> tx_16x16_prob{}; + std::array<std::array<u8, 3>, 2> tx_32x32_prob{}; + std::array<u8, 4> y_mode_prob_e8{}; + std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; + INSERT_PADDING_BYTES(64); + std::array<std::array<u8, 4>, 16> partition_prob{}; + INSERT_PADDING_BYTES(10); + std::array<std::array<u8, 2>, 4> switchable_interp_prob{}; + std::array<u8, 5> comp_inter_prob{}; + std::array<u8, 4> skip_probs{}; + std::array<u8, 3> joints{}; + std::array<u8, 2> sign{}; + std::array<std::array<u8, 1>, 2> class_0{}; + std::array<std::array<u8, 3>, 2> fr{}; + std::array<u8, 2> class_0_hp{}; + std::array<u8, 2> high_precision{}; + std::array<std::array<u8, 10>, 2> classes{}; + std::array<std::array<std::array<u8, 3>, 2>, 2> class_0_fr{}; + std::array<std::array<u8, 10>, 2> pred_bits{}; + std::array<std::array<u8, 2>, 5> single_ref_prob{}; + std::array<u8, 5> comp_ref_prob{}; + INSERT_PADDING_BYTES(17); + std::array<std::array<std::array<std::array<std::array<std::array<u8, 4>, 6>, 6>, 2>, 2>, 4> + coef_probs{}; + + void Convert(Vp9EntropyProbs& fc) { + std::memcpy(fc.inter_mode_prob.data(), inter_mode_prob.data(), fc.inter_mode_prob.size()); + + std::memcpy(fc.intra_inter_prob.data(), intra_inter_prob.data(), + fc.intra_inter_prob.size()); + + std::memcpy(fc.tx_8x8_prob.data(), tx_8x8_prob.data(), fc.tx_8x8_prob.size()); + std::memcpy(fc.tx_16x16_prob.data(), tx_16x16_prob.data(), fc.tx_16x16_prob.size()); + std::memcpy(fc.tx_32x32_prob.data(), tx_32x32_prob.data(), fc.tx_32x32_prob.size()); + + for (s32 i = 0; i < 4; i++) { + for (s32 j = 0; j < 9; j++) { + fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i]; + } + } + + std::memcpy(fc.partition_prob.data(), partition_prob.data(), fc.partition_prob.size()); + + std::memcpy(fc.switchable_interp_prob.data(), switchable_interp_prob.data(), + fc.switchable_interp_prob.size()); + std::memcpy(fc.comp_inter_prob.data(), comp_inter_prob.data(), fc.comp_inter_prob.size()); + std::memcpy(fc.skip_probs.data(), skip_probs.data(), fc.skip_probs.size()); + + std::memcpy(fc.joints.data(), joints.data(), fc.joints.size()); + + std::memcpy(fc.sign.data(), sign.data(), fc.sign.size()); + std::memcpy(fc.class_0.data(), class_0.data(), fc.class_0.size()); + std::memcpy(fc.fr.data(), fr.data(), fc.fr.size()); + std::memcpy(fc.class_0_hp.data(), class_0_hp.data(), fc.class_0_hp.size()); + std::memcpy(fc.high_precision.data(), high_precision.data(), fc.high_precision.size()); + std::memcpy(fc.classes.data(), classes.data(), fc.classes.size()); + std::memcpy(fc.class_0_fr.data(), class_0_fr.data(), fc.class_0_fr.size()); + std::memcpy(fc.prob_bits.data(), pred_bits.data(), fc.prob_bits.size()); + std::memcpy(fc.single_ref_prob.data(), single_ref_prob.data(), fc.single_ref_prob.size()); + std::memcpy(fc.comp_ref_prob.data(), comp_ref_prob.data(), fc.comp_ref_prob.size()); + + std::memcpy(fc.coef_probs.data(), coef_probs.data(), fc.coef_probs.size()); + } +}; +static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size"); + +enum class Ref { Last, Golden, AltRef }; + +struct RefPoolElement { + s64 frame{}; + Ref ref{}; + bool refresh{}; +}; + +struct FrameContexts { + s64 from{}; + bool adapted{}; + Vp9EntropyProbs probs{}; +}; + +}; // namespace Decoder +}; // namespace Tegra diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp new file mode 100644 index 000000000..c4dd4881a --- /dev/null +++ b/src/video_core/command_classes/host1x.cpp @@ -0,0 +1,39 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/command_classes/host1x.h" +#include "video_core/gpu.h" + +Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {} + +Tegra::Host1x::~Host1x() = default; + +void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { + u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u32); + std::memcpy(state_offset, &arguments, sizeof(u32)); +} + +void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& arguments) { + StateWrite(static_cast<u32>(method), arguments[0]); + switch (method) { + case Method::WaitSyncpt: + Execute(arguments[0]); + break; + case Method::LoadSyncptPayload32: + syncpoint_value = arguments[0]; + break; + case Method::WaitSyncpt32: + Execute(arguments[0]); + break; + default: + UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method)); + break; + } +} + +void Tegra::Host1x::Execute(u32 data) { + // This method waits on a valid syncpoint. + // TODO: Implement when proper Async is in place +} diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h new file mode 100644 index 000000000..013eaa0c1 --- /dev/null +++ b/src/video_core/command_classes/host1x.h @@ -0,0 +1,78 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Tegra { +class GPU; +class Nvdec; + +class Host1x { +public: + struct Host1xClassRegisters { + u32 incr_syncpt{}; + u32 incr_syncpt_ctrl{}; + u32 incr_syncpt_error{}; + INSERT_PADDING_WORDS(5); + u32 wait_syncpt{}; + u32 wait_syncpt_base{}; + u32 wait_syncpt_incr{}; + u32 load_syncpt_base{}; + u32 incr_syncpt_base{}; + u32 clear{}; + u32 wait{}; + u32 wait_with_interrupt{}; + u32 delay_use{}; + u32 tick_count_high{}; + u32 tick_count_low{}; + u32 tick_ctrl{}; + INSERT_PADDING_WORDS(23); + u32 ind_ctrl{}; + u32 ind_off2{}; + u32 ind_off{}; + std::array<u32, 31> ind_data{}; + INSERT_PADDING_WORDS(1); + u32 load_syncpoint_payload32{}; + u32 stall_ctrl{}; + u32 wait_syncpt32{}; + u32 wait_syncpt_base32{}; + u32 load_syncpt_base32{}; + u32 incr_syncpt_base32{}; + u32 stall_count_high{}; + u32 stall_count_low{}; + u32 xref_ctrl{}; + u32 channel_xref_high{}; + u32 channel_xref_low{}; + }; + static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size"); + + enum class Method : u32 { + WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4, + LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4, + WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4, + }; + + explicit Host1x(GPU& gpu); + ~Host1x(); + + /// Writes the method into the state, Invoke Execute() if encountered + void ProcessMethod(Method method, const std::vector<u32>& arguments); + +private: + /// For Host1x, execute is waiting on a syncpoint previously written into the state + void Execute(u32 data); + + /// Write argument into the provided offset + void StateWrite(u32 offset, u32 arguments); + + u32 syncpoint_value{}; + Host1xClassRegisters state{}; + GPU& gpu; +}; + +} // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp new file mode 100644 index 000000000..8ca7a7b06 --- /dev/null +++ b/src/video_core/command_classes/nvdec.cpp @@ -0,0 +1,52 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/command_classes/nvdec.h" +#include "video_core/gpu.h" + +namespace Tegra { + +Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} + +Nvdec::~Nvdec() = default; + +void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) { + if (method == Method::SetVideoCodec) { + codec->StateWrite(static_cast<u32>(method), arguments[0]); + } else { + codec->StateWrite(static_cast<u32>(method), static_cast<u64>(arguments[0]) << 8); + } + + switch (method) { + case Method::SetVideoCodec: + codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(arguments[0])); + break; + case Method::Execute: + Execute(); + break; + } +} + +AVFrame* Nvdec::GetFrame() { + return codec->GetCurrentFrame(); +} + +const AVFrame* Nvdec::GetFrame() const { + return codec->GetCurrentFrame(); +} + +void Nvdec::Execute() { + switch (codec->GetCurrentCodec()) { + case NvdecCommon::VideoCodec::H264: + case NvdecCommon::VideoCodec::Vp9: + codec->Decode(); + break; + default: + UNIMPLEMENTED_MSG("Unknown codec {}", static_cast<u32>(codec->GetCurrentCodec())); + break; + } +} + +} // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h new file mode 100644 index 000000000..af14f9857 --- /dev/null +++ b/src/video_core/command_classes/nvdec.h @@ -0,0 +1,39 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <vector> +#include "common/common_types.h" +#include "video_core/command_classes/codecs/codec.h" + +namespace Tegra { +class GPU; + +class Nvdec { +public: + enum class Method : u32 { + SetVideoCodec = 0x80, + Execute = 0xc0, + }; + + explicit Nvdec(GPU& gpu); + ~Nvdec(); + + /// Writes the method into the state, Invoke Execute() if encountered + void ProcessMethod(Method method, const std::vector<u32>& arguments); + + /// Return most recently decoded frame + AVFrame* GetFrame(); + const AVFrame* GetFrame() const; + +private: + /// Invoke codec to decode a frame + void Execute(); + + GPU& gpu; + std::unique_ptr<Codec> codec; +}; +} // namespace Tegra diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h new file mode 100644 index 000000000..01b5e086d --- /dev/null +++ b/src/video_core/command_classes/nvdec_common.h @@ -0,0 +1,48 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Tegra::NvdecCommon { + +struct NvdecRegisters { + INSERT_PADDING_WORDS(256); + u64 set_codec_id{}; + INSERT_PADDING_WORDS(254); + u64 set_platform_id{}; + u64 picture_info_offset{}; + u64 frame_bitstream_offset{}; + u64 frame_number{}; + u64 h264_slice_data_offsets{}; + u64 h264_mv_dump_offset{}; + INSERT_PADDING_WORDS(6); + u64 frame_stats_offset{}; + u64 h264_last_surface_luma_offset{}; + u64 h264_last_surface_chroma_offset{}; + std::array<u64, 17> surface_luma_offset{}; + std::array<u64, 17> surface_chroma_offset{}; + INSERT_PADDING_WORDS(132); + u64 vp9_entropy_probs_offset{}; + u64 vp9_backward_updates_offset{}; + u64 vp9_last_frame_segmap_offset{}; + u64 vp9_curr_frame_segmap_offset{}; + INSERT_PADDING_WORDS(2); + u64 vp9_last_frame_mvs_offset{}; + u64 vp9_curr_frame_mvs_offset{}; + INSERT_PADDING_WORDS(2); +}; +static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); + +enum class VideoCodec : u32 { + None = 0x0, + H264 = 0x3, + Vp8 = 0x5, + H265 = 0x7, + Vp9 = 0x9, +}; + +} // namespace Tegra::NvdecCommon diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp new file mode 100644 index 000000000..19dc9e0ab --- /dev/null +++ b/src/video_core/command_classes/sync_manager.cpp @@ -0,0 +1,60 @@ +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#include <algorithm> +#include "sync_manager.h" +#include "video_core/gpu.h" + +namespace Tegra { +SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {} +SyncptIncrManager::~SyncptIncrManager() = default; + +void SyncptIncrManager::Increment(u32 id) { + increments.emplace_back(0, 0, id, true); + IncrementAllDone(); +} + +u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) { + const u32 handle = current_id++; + increments.emplace_back(handle, class_id, id); + return handle; +} + +void SyncptIncrManager::SignalDone(u32 handle) { + const auto done_incr = + std::find_if(increments.begin(), increments.end(), + [handle](const SyncptIncr& incr) { return incr.id == handle; }); + if (done_incr != increments.cend()) { + done_incr->complete = true; + } + IncrementAllDone(); +} + +void SyncptIncrManager::IncrementAllDone() { + std::size_t done_count = 0; + for (; done_count < increments.size(); ++done_count) { + if (!increments[done_count].complete) { + break; + } + gpu.IncrementSyncPoint(increments[done_count].syncpt_id); + } + increments.erase(increments.begin(), increments.begin() + done_count); +} +} // namespace Tegra diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h new file mode 100644 index 000000000..2c321ec58 --- /dev/null +++ b/src/video_core/command_classes/sync_manager.h @@ -0,0 +1,64 @@ +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#pragma once + +#include <mutex> +#include <vector> +#include "common/common_types.h" + +namespace Tegra { +class GPU; +struct SyncptIncr { + u32 id; + u32 class_id; + u32 syncpt_id; + bool complete; + + SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false) + : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {} +}; + +class SyncptIncrManager { +public: + explicit SyncptIncrManager(GPU& gpu); + ~SyncptIncrManager(); + + /// Add syncpoint id and increment all + void Increment(u32 id); + + /// Returns a handle to increment later + u32 IncrementWhenDone(u32 class_id, u32 id); + + /// IncrememntAllDone, including handle + void SignalDone(u32 handle); + + /// Increment all sequential pending increments that are already done. + void IncrementAllDone(); + +private: + std::vector<SyncptIncr> increments; + std::mutex increment_lock; + u32 current_id{}; + + GPU& gpu; +}; + +} // namespace Tegra diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp new file mode 100644 index 000000000..5b52da277 --- /dev/null +++ b/src/video_core/command_classes/vic.cpp @@ -0,0 +1,180 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include "common/assert.h" +#include "video_core/command_classes/nvdec.h" +#include "video_core/command_classes/vic.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/texture_cache/surface_params.h" + +extern "C" { +#include <libswscale/swscale.h> +} + +namespace Tegra { + +Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) + : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {} +Vic::~Vic() = default; + +void Vic::VicStateWrite(u32 offset, u32 arguments) { + u8* const state_offset = reinterpret_cast<u8*>(&vic_state) + offset * sizeof(u32); + std::memcpy(state_offset, &arguments, sizeof(u32)); +} + +void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) { + LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method)); + VicStateWrite(static_cast<u32>(method), arguments[0]); + const u64 arg = static_cast<u64>(arguments[0]) << 8; + switch (method) { + case Method::Execute: + Execute(); + break; + case Method::SetConfigStructOffset: + config_struct_address = arg; + break; + case Method::SetOutputSurfaceLumaOffset: + output_surface_luma_address = arg; + break; + case Method::SetOutputSurfaceChromaUOffset: + output_surface_chroma_u_address = arg; + break; + case Method::SetOutputSurfaceChromaVOffset: + output_surface_chroma_v_address = arg; + break; + default: + break; + } +} + +void Vic::Execute() { + if (output_surface_luma_address == 0) { + LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}", + vic_state.output_surface.luma_offset); + return; + } + const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)}; + const VideoPixelFormat pixel_format = + static_cast<VideoPixelFormat>(config.pixel_format.Value()); + switch (pixel_format) { + case VideoPixelFormat::BGRA8: + case VideoPixelFormat::RGBA8: { + LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); + const auto* frame = nvdec_processor->GetFrame(); + + if (!frame || frame->width == 0 || frame->height == 0) { + return; + } + if (scaler_ctx == nullptr || frame->width != scaler_width || + frame->height != scaler_height) { + const AVPixelFormat target_format = + (pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA; + + sws_freeContext(scaler_ctx); + scaler_ctx = nullptr; + + // FFmpeg returns all frames in YUV420, convert it into expected format + scaler_ctx = + sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width, + frame->height, target_format, 0, nullptr, nullptr, nullptr); + + scaler_width = frame->width; + scaler_height = frame->height; + } + // Get Converted frame + const std::size_t linear_size = frame->width * frame->height * 4; + + using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; + AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free}; + + const int converted_stride{frame->width * 4}; + u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; + + sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, + &converted_frame_buf_addr, &converted_stride); + + const u32 blk_kind = static_cast<u32>(config.block_linear_kind); + if (blk_kind != 0) { + // swizzle pitch linear to block linear + const u32 block_height = static_cast<u32>(config.block_linear_height_log2); + const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, + block_height, 0); + std::vector<u8> swizzled_data(size); + Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, + swizzled_data.data(), converted_frame_buffer.get(), + false, block_height, 0, 1); + + gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); + gpu.Maxwell3D().OnMemoryWrite(); + } else { + // send pitch linear frame + gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, + linear_size); + gpu.Maxwell3D().OnMemoryWrite(); + } + break; + } + case VideoPixelFormat::Yuv420: { + LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); + + const auto* frame = nvdec_processor->GetFrame(); + + if (!frame || frame->width == 0 || frame->height == 0) { + return; + } + + const std::size_t surface_width = config.surface_width_minus1 + 1; + const std::size_t surface_height = config.surface_height_minus1 + 1; + const std::size_t half_width = surface_width / 2; + const std::size_t half_height = config.surface_height_minus1 / 2; + const std::size_t aligned_width = (surface_width + 0xff) & ~0xff; + + const auto* luma_ptr = frame->data[0]; + const auto* chroma_b_ptr = frame->data[1]; + const auto* chroma_r_ptr = frame->data[2]; + const auto stride = frame->linesize[0]; + const auto half_stride = frame->linesize[1]; + + std::vector<u8> luma_buffer(aligned_width * surface_height); + std::vector<u8> chroma_buffer(aligned_width * half_height); + + // Populate luma buffer + for (std::size_t y = 0; y < surface_height - 1; ++y) { + std::size_t src = y * stride; + std::size_t dst = y * aligned_width; + + std::size_t size = surface_width; + + for (std::size_t offset = 0; offset < size; ++offset) { + luma_buffer[dst + offset] = luma_ptr[src + offset]; + } + } + gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), + luma_buffer.size()); + + // Populate chroma buffer from both channels with interleaving. + for (std::size_t y = 0; y < half_height; ++y) { + std::size_t src = y * half_stride; + std::size_t dst = y * aligned_width; + + for (std::size_t x = 0; x < half_width; ++x) { + chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x]; + chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x]; + } + } + gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), + chroma_buffer.size()); + gpu.Maxwell3D().OnMemoryWrite(); + break; + } + default: + UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value()); + break; + } +} + +} // namespace Tegra diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h new file mode 100644 index 000000000..8c4e284a1 --- /dev/null +++ b/src/video_core/command_classes/vic.h @@ -0,0 +1,110 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <vector> +#include "common/bit_field.h" +#include "common/common_types.h" + +struct SwsContext; + +namespace Tegra { +class GPU; +class Nvdec; + +struct PlaneOffsets { + u32 luma_offset{}; + u32 chroma_u_offset{}; + u32 chroma_v_offset{}; +}; + +struct VicRegisters { + INSERT_PADDING_WORDS(64); + u32 nop{}; + INSERT_PADDING_WORDS(15); + u32 pm_trigger{}; + INSERT_PADDING_WORDS(47); + u32 set_application_id{}; + u32 set_watchdog_timer{}; + INSERT_PADDING_WORDS(17); + u32 context_save_area{}; + u32 context_switch{}; + INSERT_PADDING_WORDS(43); + u32 execute{}; + INSERT_PADDING_WORDS(63); + std::array<std::array<PlaneOffsets, 8>, 8> surfacex_slots{}; + u32 picture_index{}; + u32 control_params{}; + u32 config_struct_offset{}; + u32 filter_struct_offset{}; + u32 palette_offset{}; + u32 hist_offset{}; + u32 context_id{}; + u32 fce_ucode_size{}; + PlaneOffsets output_surface{}; + u32 fce_ucode_offset{}; + INSERT_PADDING_WORDS(4); + std::array<u32, 8> slot_context_id{}; + INSERT_PADDING_WORDS(16); +}; +static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size"); + +class Vic { +public: + enum class Method : u32 { + Execute = 0xc0, + SetControlParams = 0x1c1, + SetConfigStructOffset = 0x1c2, + SetOutputSurfaceLumaOffset = 0x1c8, + SetOutputSurfaceChromaUOffset = 0x1c9, + SetOutputSurfaceChromaVOffset = 0x1ca + }; + + explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor); + ~Vic(); + + /// Write to the device state. + void ProcessMethod(Method method, const std::vector<u32>& arguments); + +private: + void Execute(); + + void VicStateWrite(u32 offset, u32 arguments); + VicRegisters vic_state{}; + + enum class VideoPixelFormat : u64_le { + RGBA8 = 0x1f, + BGRA8 = 0x20, + Yuv420 = 0x44, + }; + + union VicConfig { + u64_le raw{}; + BitField<0, 7, u64_le> pixel_format; + BitField<7, 2, u64_le> chroma_loc_horiz; + BitField<9, 2, u64_le> chroma_loc_vert; + BitField<11, 4, u64_le> block_linear_kind; + BitField<15, 4, u64_le> block_linear_height_log2; + BitField<19, 3, u64_le> reserved0; + BitField<22, 10, u64_le> reserved1; + BitField<32, 14, u64_le> surface_width_minus1; + BitField<46, 14, u64_le> surface_height_minus1; + }; + + GPU& gpu; + std::shared_ptr<Tegra::Nvdec> nvdec_processor; + + GPUVAddr config_struct_address{}; + GPUVAddr output_surface_luma_address{}; + GPUVAddr output_surface_chroma_u_address{}; + GPUVAddr output_surface_chroma_v_address{}; + + SwsContext* scaler_ctx{}; + s32 scaler_width{}; + s32 scaler_height{}; +}; + +} // namespace Tegra diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 4bb9256e9..171f78183 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -27,9 +27,10 @@ namespace Tegra { MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -GPU::GPU(Core::System& system_, bool is_async_) +GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, + cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_}, maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, fermi_2d{std::make_unique<Engines::Fermi2D>()}, kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, @@ -77,10 +78,18 @@ DmaPusher& GPU::DmaPusher() { return *dma_pusher; } +Tegra::CDmaPusher& GPU::CDmaPusher() { + return *cdma_pusher; +} + const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +const Tegra::CDmaPusher& GPU::CDmaPusher() const { + return *cdma_pusher; +} + void GPU::WaitFence(u32 syncpoint_id, u32 value) { // Synced GPU, is always in sync if (!is_async) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2d15d1c6f..b8c613b11 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "core/hle/service/nvdrv/nvdata.h" #include "core/hle/service/nvflinger/buffer_queue.h" +#include "video_core/cdma_pusher.h" #include "video_core/dma_pusher.h" using CacheAddr = std::uintptr_t; @@ -157,7 +158,7 @@ public: method_count(method_count) {} }; - explicit GPU(Core::System& system, bool is_async); + explicit GPU(Core::System& system, bool is_async, bool use_nvdec); virtual ~GPU(); /// Binds a renderer to the GPU. @@ -209,6 +210,15 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + /// Returns a const reference to the GPU DMA pusher. + const Tegra::DmaPusher& DmaPusher() const; + + /// Returns a reference to the GPU CDMA pusher. + Tegra::CDmaPusher& CDmaPusher(); + + /// Returns a const reference to the GPU CDMA pusher. + const Tegra::CDmaPusher& CDmaPusher() const; + VideoCore::RendererBase& Renderer() { return *renderer; } @@ -249,8 +259,9 @@ public: return is_async; } - /// Returns a const reference to the GPU DMA pusher. - const Tegra::DmaPusher& DmaPusher() const; + bool UseNvdec() const { + return use_nvdec; + } struct Regs { static constexpr size_t NUM_REGS = 0x40; @@ -311,6 +322,9 @@ public: /// Push GPU command entries to be processed virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; + /// Push GPU command buffer entries to be processed + virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0; + /// Swap buffers (render frame) virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; @@ -349,7 +363,9 @@ protected: Core::System& system; std::unique_ptr<Tegra::MemoryManager> memory_manager; std::unique_ptr<Tegra::DmaPusher> dma_pusher; + std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; std::unique_ptr<VideoCore::RendererBase> renderer; + const bool use_nvdec; private: /// Mapping of command subchannels to their bound engine ids @@ -372,6 +388,7 @@ private: std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; std::mutex sync_mutex; + std::mutex device_mutex; std::condition_variable sync_cv; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 70a3d5738..a9baaf7ef 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -10,12 +10,13 @@ namespace VideoCommon { -GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {} +GPUAsynch::GPUAsynch(Core::System& system, bool use_nvdec) + : GPU{system, true, use_nvdec}, gpu_thread{system} {} GPUAsynch::~GPUAsynch() = default; void GPUAsynch::Start() { - gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); + gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher); cpu_context = renderer->GetRenderWindow().CreateSharedContext(); cpu_context->MakeCurrent(); } @@ -32,6 +33,27 @@ void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { gpu_thread.SubmitList(std::move(entries)); } +void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { + if (!use_nvdec) { + return; + } + // This condition fires when a video stream ends, clear all intermediary data + if (entries[0].raw == 0xDEADB33F) { + cdma_pusher.reset(); + return; + } + if (!cdma_pusher) { + cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this); + } + + // SubmitCommandBuffer would make the nvdec operations async, this is not currently working + // TODO(ameerj): RE proper async nvdec operation + // gpu_thread.SubmitCommandBuffer(std::move(entries)); + + cdma_pusher->Push(std::move(entries)); + cdma_pusher->DispatchCalls(); +} + void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { gpu_thread.SwapBuffers(framebuffer); } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index f89c855a5..0c0872e73 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -20,13 +20,14 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU asynchronously class GPUAsynch final : public Tegra::GPU { public: - explicit GPUAsynch(Core::System& system); + explicit GPUAsynch(Core::System& system, bool use_nvdec); ~GPUAsynch() override; void Start() override; void ObtainContext() override; void ReleaseContext() override; void PushGPUEntries(Tegra::CommandList&& entries) override; + void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void FlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 1ca47ddef..ecf7bbdf3 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -7,7 +7,7 @@ namespace VideoCommon { -GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {} +GPUSynch::GPUSynch(Core::System& system, bool use_nvdec) : GPU{system, false, use_nvdec} {} GPUSynch::~GPUSynch() = default; @@ -26,6 +26,22 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { dma_pusher->DispatchCalls(); } +void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { + if (!use_nvdec) { + return; + } + // This condition fires when a video stream ends, clears all intermediary data + if (entries[0].raw == 0xDEADB33F) { + cdma_pusher.reset(); + return; + } + if (!cdma_pusher) { + cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this); + } + cdma_pusher->Push(std::move(entries)); + cdma_pusher->DispatchCalls(); +} + void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { renderer->SwapBuffers(framebuffer); } diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 297258cb1..9d778c71a 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -19,13 +19,14 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU synchronously class GPUSynch final : public Tegra::GPU { public: - explicit GPUSynch(Core::System& system); + explicit GPUSynch(Core::System& system, bool use_nvdec); ~GPUSynch() override; void Start() override; void ObtainContext() override; void ReleaseContext() override; void PushGPUEntries(Tegra::CommandList&& entries) override; + void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void FlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index bf761abf2..4b8f58283 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -18,7 +18,7 @@ namespace VideoCommon::GPUThread { /// Runs the GPU thread static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, - SynchState& state) { + SynchState& state, Tegra::CDmaPusher& cdma_pusher) { std::string name = "yuzu:GPU"; MicroProfileOnThreadCreate(name.c_str()); Common::SetCurrentThreadName(name.c_str()); @@ -42,6 +42,10 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { dma_pusher.Push(std::move(submit_list->entries)); dma_pusher.DispatchCalls(); + } else if (const auto command_list = std::get_if<SubmitChCommandEntries>(&next.data)) { + // NVDEC + cdma_pusher.Push(std::move(command_list->entries)); + cdma_pusher.DispatchCalls(); } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { @@ -75,15 +79,19 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, - Tegra::DmaPusher& dma_pusher) { - thread = std::thread{RunThread, std::ref(system), std::ref(renderer), - std::ref(context), std::ref(dma_pusher), std::ref(state)}; + Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) { + thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), + std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher)); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { PushCommand(SubmitListCommand(std::move(entries))); } +void ThreadManager::SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries) { + PushCommand(SubmitChCommandEntries(std::move(entries))); +} + void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 5a28335d6..32a34e3a7 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -37,6 +37,14 @@ struct SubmitListCommand final { Tegra::CommandList entries; }; +/// Command to signal to the GPU thread that a cdma command list is ready for processing +struct SubmitChCommandEntries final { + explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries) + : entries{std::move(entries)} {} + + Tegra::ChCommandHeaderList entries; +}; + /// Command to signal to the GPU thread that a swap buffers is pending struct SwapBuffersCommand final { explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) @@ -77,9 +85,9 @@ struct OnCommandListEndCommand final {}; struct GPUTickCommand final {}; using CommandData = - std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, - InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand, - GPUTickCommand>; + std::variant<EndProcessingCommand, SubmitListCommand, SubmitChCommandEntries, + SwapBuffersCommand, FlushRegionCommand, InvalidateRegionCommand, + FlushAndInvalidateRegionCommand, OnCommandListEndCommand, GPUTickCommand>; struct CommandDataContainer { CommandDataContainer() = default; @@ -109,11 +117,14 @@ public: /// Creates and starts the GPU thread. void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, - Tegra::DmaPusher& dma_pusher); + Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher); /// Push GPU command entries to be processed void SubmitList(Tegra::CommandList&& entries); + /// Push GPU CDMA command buffer entries to be processed + void SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries); + /// Swap buffers (render frame) void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index aa62363a7..c157724a9 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,23 +1,16 @@ -set(SHADER_FILES +set(SHADER_SOURCES opengl_present.frag opengl_present.vert ) set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) -set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) - set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) -add_custom_command( - OUTPUT - ${SHADER_DIR} - COMMAND - ${CMAKE_COMMAND} -E make_directory ${SHADER_DIR} -) +set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) -foreach(FILENAME IN ITEMS ${SHADER_FILES}) +foreach(FILENAME IN ITEMS ${SHADER_SOURCES}) string(REPLACE "." "_" SHADER_NAME ${FILENAME}) set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) @@ -29,8 +22,8 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) MAIN_DEPENDENCY ${SOURCE_FILE} DEPENDS - ${HEADER_GENERATOR} ${INPUT_FILE} + # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified ) set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) endforeach() @@ -39,5 +32,5 @@ add_custom_target(host_shaders DEPENDS ${SHADER_HEADERS} SOURCES - ${SHADER_FILES} + ${SHADER_SOURCES} ) diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake index 368bce0ed..c0fc49768 100644 --- a/src/video_core/host_shaders/StringShaderHeader.cmake +++ b/src/video_core/host_shaders/StringShaderHeader.cmake @@ -8,4 +8,6 @@ string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME) file(READ ${SOURCE_FILE} CONTENTS) +get_filename_component(OUTPUT_DIR ${HEADER_FILE} DIRECTORY) +make_directory(${OUTPUT_DIR}) configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY) diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 02cf53d15..6e70bd362 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -11,6 +11,7 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" namespace Tegra { @@ -44,6 +45,12 @@ GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_ return Map(cpu_addr, *FindFreeRange(size, align), size); } +GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) { + const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true); + ASSERT(gpu_addr); + return Map(cpu_addr, *gpu_addr, size); +} + void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { if (!size) { return; @@ -108,7 +115,8 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s page_table[PageEntryIndex(gpu_addr)] = page_entry; } -std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align) const { +std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, + bool start_32bit_address) const { if (!align) { align = page_size; } else { @@ -116,7 +124,7 @@ std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size } u64 available_size{}; - GPUVAddr gpu_addr{address_space_start}; + GPUVAddr gpu_addr{start_32bit_address ? address_space_start_low : address_space_start}; while (gpu_addr + available_size < address_space_size) { if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) { available_size += page_size; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 53c8d122a..c078193d9 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -116,6 +116,7 @@ public: [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); + [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align); void Unmap(GPUVAddr gpu_addr, std::size_t size); @@ -124,7 +125,8 @@ private: [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const; void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size); GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size); - [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const; + [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align, + bool start_32bit_address = false) const; void TryLockPage(PageEntry page_entry, std::size_t size); void TryUnlockPage(PageEntry page_entry, std::size_t size); @@ -135,6 +137,7 @@ private: static constexpr u64 address_space_size = 1ULL << 40; static constexpr u64 address_space_start = 1ULL << 32; + static constexpr u64 address_space_start_low = 1ULL << 16; static constexpr u64 page_bits{16}; static constexpr u64 page_size{1 << page_bits}; static constexpr u64 page_mask{page_size - 1}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 166ee34e1..70dd0c3c6 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -317,8 +317,7 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo return std::nullopt; } } - - return std::move(entries); + return entries; } void ShaderDiskCacheOpenGL::InvalidateTransferable() { diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index c034558a3..4e83303d8 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -844,7 +844,7 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp VK_SUCCESS) { return std::nullopt; } - return std::move(properties); + return properties; } std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index a14df06a3..dd5cee4a1 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -44,10 +44,11 @@ namespace VideoCore { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { std::unique_ptr<Tegra::GPU> gpu; + const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - gpu = std::make_unique<VideoCommon::GPUAsynch>(system); + gpu = std::make_unique<VideoCommon::GPUAsynch>(system, use_nvdec); } else { - gpu = std::make_unique<VideoCommon::GPUSynch>(system); + gpu = std::make_unique<VideoCommon::GPUSynch>(system, use_nvdec); } auto context = emu_window.CreateSharedContext(); diff --git a/src/web_service/CMakeLists.txt b/src/web_service/CMakeLists.txt index 7e484b906..ae85a72ea 100644 --- a/src/web_service/CMakeLists.txt +++ b/src/web_service/CMakeLists.txt @@ -9,4 +9,4 @@ add_library(web_service STATIC ) create_target_directory_groups(web_service) -target_link_libraries(web_service PRIVATE common nlohmann_json::nlohmann_json httplib lurlparser) +target_link_libraries(web_service PRIVATE common nlohmann_json::nlohmann_json httplib) diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp index 534960d09..67183e64c 100644 --- a/src/web_service/web_backend.cpp +++ b/src/web_service/web_backend.cpp @@ -7,7 +7,6 @@ #include <mutex> #include <string> -#include <LUrlParser.h> #include <fmt/format.h> #include <httplib.h> @@ -19,9 +18,6 @@ namespace WebService { constexpr std::array<const char, 1> API_VERSION{'1'}; -constexpr int HTTP_PORT = 80; -constexpr int HTTPS_PORT = 443; - constexpr std::size_t TIMEOUT_SECONDS = 30; struct Client::Impl { @@ -67,22 +63,14 @@ struct Client::Impl { const std::string& jwt = "", const std::string& username = "", const std::string& token = "") { if (cli == nullptr) { - const auto parsedUrl = LUrlParser::clParseURL::ParseURL(host); - int port{}; - if (parsedUrl.m_Scheme == "http") { - if (!parsedUrl.GetPort(&port)) { - port = HTTP_PORT; - } - } else if (parsedUrl.m_Scheme == "https") { - if (!parsedUrl.GetPort(&port)) { - port = HTTPS_PORT; - } - } else { - LOG_ERROR(WebService, "Bad URL scheme {}", parsedUrl.m_Scheme); - return WebResult{WebResult::Code::InvalidURL, "Bad URL scheme", ""}; - } - cli = std::make_unique<httplib::Client>(parsedUrl.m_Host.c_str(), port); + cli = std::make_unique<httplib::Client>(host.c_str()); + } + + if (!cli->is_valid()) { + LOG_ERROR(WebService, "Client is invalid, skipping request!"); + return {}; } + cli->set_connection_timeout(TIMEOUT_SECONDS); cli->set_read_timeout(TIMEOUT_SECONDS); cli->set_write_timeout(TIMEOUT_SECONDS); diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index cc0291b15..4659e1f89 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -265,9 +265,11 @@ if (MSVC) include(CopyYuzuQt5Deps) include(CopyYuzuSDLDeps) include(CopyYuzuUnicornDeps) + include(CopyYuzuFFmpegDeps) copy_yuzu_Qt5_deps(yuzu) copy_yuzu_SDL_deps(yuzu) copy_yuzu_unicorn_deps(yuzu) + copy_yuzu_FFmpeg_deps(yuzu) endif() if (NOT APPLE) diff --git a/src/yuzu/applets/controller.cpp b/src/yuzu/applets/controller.cpp index 2760487a3..c6fa3e4f6 100644 --- a/src/yuzu/applets/controller.cpp +++ b/src/yuzu/applets/controller.cpp @@ -18,15 +18,15 @@ namespace { -constexpr std::array<std::array<bool, 4>, 8> led_patterns = {{ - {1, 0, 0, 0}, - {1, 1, 0, 0}, - {1, 1, 1, 0}, - {1, 1, 1, 1}, - {1, 0, 0, 1}, - {1, 0, 1, 0}, - {1, 0, 1, 1}, - {0, 1, 1, 0}, +constexpr std::array<std::array<bool, 4>, 8> led_patterns{{ + {true, false, false, false}, + {true, true, false, false}, + {true, true, true, false}, + {true, true, true, true}, + {true, false, false, true}, + {true, false, true, false}, + {true, false, true, true}, + {false, true, true, false}, }}; void UpdateController(Settings::ControllerType controller_type, std::size_t npad_index, @@ -589,7 +589,7 @@ QtControllerSelector::QtControllerSelector(GMainWindow& parent) { QtControllerSelector::~QtControllerSelector() = default; void QtControllerSelector::ReconfigureControllers( - std::function<void()> callback, Core::Frontend::ControllerParameters parameters) const { + std::function<void()> callback, const Core::Frontend::ControllerParameters& parameters) const { this->callback = std::move(callback); emit MainWindowReconfigureControllers(parameters); } diff --git a/src/yuzu/applets/controller.h b/src/yuzu/applets/controller.h index 2d6d588c6..729ecc831 100644 --- a/src/yuzu/applets/controller.h +++ b/src/yuzu/applets/controller.h @@ -120,11 +120,13 @@ public: explicit QtControllerSelector(GMainWindow& parent); ~QtControllerSelector() override; - void ReconfigureControllers(std::function<void()> callback, - Core::Frontend::ControllerParameters parameters) const override; + void ReconfigureControllers( + std::function<void()> callback, + const Core::Frontend::ControllerParameters& parameters) const override; signals: - void MainWindowReconfigureControllers(Core::Frontend::ControllerParameters parameters) const; + void MainWindowReconfigureControllers( + const Core::Frontend::ControllerParameters& parameters) const; private: void MainWindowReconfigureFinished(); diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp index dca8835ed..c9a2f8601 100644 --- a/src/yuzu/applets/profile_select.cpp +++ b/src/yuzu/applets/profile_select.cpp @@ -114,6 +114,15 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent) QtProfileSelectionDialog::~QtProfileSelectionDialog() = default; +int QtProfileSelectionDialog::exec() { + // Skip profile selection when there's only one. + if (profile_manager->GetUserCount() == 1) { + user_index = 0; + return QDialog::Accepted; + } + return QDialog::exec(); +} + void QtProfileSelectionDialog::accept() { QDialog::accept(); } diff --git a/src/yuzu/applets/profile_select.h b/src/yuzu/applets/profile_select.h index cee886a77..29c33cca0 100644 --- a/src/yuzu/applets/profile_select.h +++ b/src/yuzu/applets/profile_select.h @@ -27,6 +27,7 @@ public: explicit QtProfileSelectionDialog(QWidget* parent); ~QtProfileSelectionDialog() override; + int exec() override; void accept() override; void reject() override; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index d2913d613..1ce62e4a6 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -515,7 +515,7 @@ void Config::ReadMotionTouchValues() { void Config::ReadCoreValues() { qt_config->beginGroup(QStringLiteral("Core")); - ReadSettingGlobal(Settings::values.use_multi_core, QStringLiteral("use_multi_core"), false); + ReadSettingGlobal(Settings::values.use_multi_core, QStringLiteral("use_multi_core"), true); qt_config->endGroup(); } @@ -716,10 +716,12 @@ void Config::ReadRendererValues() { QStringLiteral("use_disk_shader_cache"), true); ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0); ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation, - QStringLiteral("use_asynchronous_gpu_emulation"), false); + QStringLiteral("use_asynchronous_gpu_emulation"), true); + ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"), + true); ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), - false); + true); ReadSettingGlobal(Settings::values.use_asynchronous_shaders, QStringLiteral("use_asynchronous_shaders"), false); ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), @@ -1108,7 +1110,7 @@ void Config::SaveControlValues() { void Config::SaveCoreValues() { qt_config->beginGroup(QStringLiteral("Core")); - WriteSettingGlobal(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); + WriteSettingGlobal(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, true); qt_config->endGroup(); } @@ -1264,10 +1266,12 @@ void Config::SaveRendererValues() { static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)), Settings::values.gpu_accuracy.UsingGlobal(), 0); WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"), - Settings::values.use_asynchronous_gpu_emulation, false); + Settings::values.use_asynchronous_gpu_emulation, true); + WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation, + true); WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), - Settings::values.use_assembly_shaders, false); + Settings::values.use_assembly_shaders, true); WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"), Settings::values.use_asynchronous_shaders, false); WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 07d818548..4f083ecda 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -70,9 +70,11 @@ void ConfigureGraphics::SetConfiguration() { ui->api->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); + ui->use_nvdec_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->use_asynchronous_gpu_emulation->setChecked( Settings::values.use_asynchronous_gpu_emulation.GetValue()); + ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue()); if (Settings::configuring_global) { ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue())); @@ -116,6 +118,9 @@ void ConfigureGraphics::ApplyConfiguration() { Settings::values.use_asynchronous_gpu_emulation.SetValue( ui->use_asynchronous_gpu_emulation->isChecked()); } + if (Settings::values.use_nvdec_emulation.UsingGlobal()) { + Settings::values.use_nvdec_emulation.SetValue(ui->use_nvdec_emulation->isChecked()); + } if (Settings::values.bg_red.UsingGlobal()) { Settings::values.bg_red.SetValue(static_cast<float>(bg_color.redF())); Settings::values.bg_green.SetValue(static_cast<float>(bg_color.greenF())); @@ -144,6 +149,8 @@ void ConfigureGraphics::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, ui->use_asynchronous_gpu_emulation, use_asynchronous_gpu_emulation); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation, + ui->use_nvdec_emulation, use_nvdec_emulation); if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.bg_red.SetGlobal(true); @@ -240,6 +247,7 @@ void ConfigureGraphics::SetupPerGameUI() { ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); ui->use_asynchronous_gpu_emulation->setEnabled( Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); + ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal()); ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); @@ -253,6 +261,8 @@ void ConfigureGraphics::SetupPerGameUI() { ConfigurationShared::SetColoredTristate( ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache); + ConfigurationShared::SetColoredTristate( + ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation, Settings::values.use_asynchronous_gpu_emulation, use_asynchronous_gpu_emulation); diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index b4961f719..1fefc88eb 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -46,6 +46,7 @@ private: std::unique_ptr<Ui::ConfigureGraphics> ui; QColor bg_color; + ConfigurationShared::CheckState use_nvdec_emulation; ConfigurationShared::CheckState use_disk_shader_cache; ConfigurationShared::CheckState use_asynchronous_gpu_emulation; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 62aa337e7..58486eb1e 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -98,6 +98,13 @@ </widget> </item> <item> + <widget class="QCheckBox" name="use_nvdec_emulation"> + <property name="text"> + <string>Use NVDEC emulation</string> + </property> + </widget> + </item> + <item> <widget class="QWidget" name="aspect_ratio_layout" native="true"> <layout class="QHBoxLayout" name="horizontalLayout_6"> <property name="leftMargin"> diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp index 9ad43ed8f..5e8e201dc 100644 --- a/src/yuzu/configuration/configure_system.cpp +++ b/src/yuzu/configuration/configure_system.cpp @@ -12,6 +12,7 @@ #include "common/assert.h" #include "common/file_util.h" #include "core/core.h" +#include "core/hle/service/time/time.h" #include "core/settings.h" #include "ui_configure_system.h" #include "yuzu/configuration/configuration_shared.h" @@ -104,6 +105,22 @@ void ConfigureSystem::SetConfiguration() { void ConfigureSystem::ReadSystemSettings() {} void ConfigureSystem::ApplyConfiguration() { + // Allow setting custom RTC even if system is powered on, to allow in-game time to be fast + // forwared + if (Settings::values.custom_rtc.UsingGlobal()) { + if (ui->custom_rtc_checkbox->isChecked()) { + Settings::values.custom_rtc.SetValue( + std::chrono::seconds(ui->custom_rtc_edit->dateTime().toSecsSinceEpoch())); + if (Core::System::GetInstance().IsPoweredOn()) { + const s64 posix_time{Settings::values.custom_rtc.GetValue()->count() + + Service::Time::TimeManager::GetExternalTimeZoneOffset()}; + Core::System::GetInstance().GetTimeManager().UpdateLocalSystemClockTime(posix_time); + } + } else { + Settings::values.custom_rtc.SetValue(std::nullopt); + } + } + if (!enabled) { return; } @@ -131,15 +148,6 @@ void ConfigureSystem::ApplyConfiguration() { Settings::values.rng_seed.SetValue(std::nullopt); } } - - if (Settings::values.custom_rtc.UsingGlobal()) { - if (ui->custom_rtc_checkbox->isChecked()) { - Settings::values.custom_rtc.SetValue( - std::chrono::seconds(ui->custom_rtc_edit->dateTime().toSecsSinceEpoch())); - } else { - Settings::values.custom_rtc.SetValue(std::nullopt); - } - } } else { ConfigurationShared::ApplyPerGameSetting(&Settings::values.language_index, ui->combo_language); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index e3de0f0e1..18e68e590 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -303,24 +303,18 @@ void GMainWindow::ControllerSelectorReconfigureControllers( } void GMainWindow::ProfileSelectorSelectProfile() { - const Service::Account::ProfileManager manager; - int index = 0; - if (manager.GetUserCount() != 1) { - QtProfileSelectionDialog dialog(this); - dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowStaysOnTopHint | - Qt::WindowTitleHint | Qt::WindowSystemMenuHint | - Qt::WindowCloseButtonHint); - dialog.setWindowModality(Qt::WindowModal); - - if (dialog.exec() == QDialog::Rejected) { - emit ProfileSelectorFinishedSelection(std::nullopt); - return; - } - - index = dialog.GetIndex(); + QtProfileSelectionDialog dialog(this); + dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowStaysOnTopHint | + Qt::WindowTitleHint | Qt::WindowSystemMenuHint | + Qt::WindowCloseButtonHint); + dialog.setWindowModality(Qt::WindowModal); + if (dialog.exec() == QDialog::Rejected) { + emit ProfileSelectorFinishedSelection(std::nullopt); + return; } - const auto uuid = manager.GetUser(static_cast<std::size_t>(index)); + const Service::Account::ProfileManager manager; + const auto uuid = manager.GetUser(static_cast<std::size_t>(dialog.GetIndex())); if (!uuid.has_value()) { emit ProfileSelectorFinishedSelection(std::nullopt); return; diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 23448e747..334038ef9 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -371,7 +371,7 @@ void Config::ReadValues() { // Core Settings::values.use_multi_core.SetValue( - sdl2_config->GetBoolean("Core", "use_multi_core", false)); + sdl2_config->GetBoolean("Core", "use_multi_core", true)); // Renderer const int renderer_backend = sdl2_config->GetInteger( @@ -395,11 +395,11 @@ void Config::ReadValues() { const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level)); Settings::values.use_asynchronous_gpu_emulation.SetValue( - sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false)); + sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true)); Settings::values.use_vsync.SetValue( static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1))); Settings::values.use_assembly_shaders.SetValue( - sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false)); + sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", true)); Settings::values.use_asynchronous_shaders.SetValue( sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); Settings::values.use_asynchronous_shaders.SetValue( diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index aa9e40380..796e27df4 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -94,7 +94,7 @@ udp_pad_index= [Core] # Whether to use multi-core for CPU emulation -# 0 (default): Disabled, 1: Enabled +# 0: Disabled, 1 (default): Enabled use_multi_core= [Cpu] @@ -163,7 +163,7 @@ max_anisotropy = use_vsync = # Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required. -# 0 (default): Off, 1: On +# 0: Off, 1 (default): On use_assembly_shaders = # Whether to allow asynchronous shader building. |
