From 13ed9438fb47d62663fb1ef367baac1a567b25b3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 4 Feb 2020 11:23:12 -0400 Subject: Common: Implement a basic SpinLock class --- src/common/CMakeLists.txt | 2 ++ src/common/spin_lock.cpp | 46 ++++++++++++++++++++++++++++++++++++++++++++++ src/common/spin_lock.h | 20 ++++++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 src/common/spin_lock.cpp create mode 100644 src/common/spin_lock.h (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 0a3e2f4d1..c8bf80372 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -143,6 +143,8 @@ add_library(common STATIC scm_rev.cpp scm_rev.h scope_exit.h + spin_lock.cpp + spin_lock.h string_util.cpp string_util.h swap.h diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp new file mode 100644 index 000000000..8077b78d2 --- /dev/null +++ b/src/common/spin_lock.cpp @@ -0,0 +1,46 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/spin_lock.h" + +#if _MSC_VER +#include +#if _M_AMD64 +#define __x86_64__ 1 +#endif +#if _M_ARM64 +#define __aarch64__ 1 +#endif +#else +#if __x86_64__ +#include +#endif +#endif + +namespace { + +void thread_pause() { +#if __x86_64__ + _mm_pause(); +#elif __aarch64__ && _MSC_VER + __yield(); +#elif __aarch64__ + asm("yield"); +#endif +} + +} // namespace + +namespace Common { + +void SpinLock::lock() { + while (lck.test_and_set(std::memory_order_acquire)) + thread_pause(); +} + +void SpinLock::unlock() { + lck.clear(std::memory_order_release); +} + +} // namespace Common diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h new file mode 100644 index 000000000..cbc67b6c8 --- /dev/null +++ b/src/common/spin_lock.h @@ -0,0 +1,20 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Common { + +class SpinLock { +public: + void lock(); + void unlock(); + +private: + std::atomic_flag lck = ATOMIC_FLAG_INIT; +}; + +} // namespace Common -- cgit v1.2.3 From bc266a9d98f38f6fd1006f1ca52bd57e6a7f37d3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 4 Feb 2020 15:06:23 -0400 Subject: Common: Implement a basic Fiber class. --- src/common/CMakeLists.txt | 2 + src/common/fiber.cpp | 147 ++++++++++++++++++++++++++++++++++++++++++++++ src/common/fiber.h | 55 +++++++++++++++++ 3 files changed, 204 insertions(+) create mode 100644 src/common/fiber.cpp create mode 100644 src/common/fiber.h (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index c8bf80372..554d6e253 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -110,6 +110,8 @@ add_library(common STATIC common_types.h dynamic_library.cpp dynamic_library.h + fiber.cpp + fiber.h file_util.cpp file_util.h hash.h diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp new file mode 100644 index 000000000..eb59f1aa9 --- /dev/null +++ b/src/common/fiber.cpp @@ -0,0 +1,147 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/fiber.h" + +namespace Common { + +#ifdef _MSC_VER +#include + +struct Fiber::FiberImpl { + LPVOID handle = nullptr; +}; + +void Fiber::_start([[maybe_unused]] void* parameter) { + guard.lock(); + if (previous_fiber) { + previous_fiber->guard.unlock(); + previous_fiber = nullptr; + } + entry_point(start_parameter); +} + +static void __stdcall FiberStartFunc(LPVOID lpFiberParameter) +{ + auto fiber = static_cast(lpFiberParameter); + fiber->_start(nullptr); +} + +Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) + : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} { + impl = std::make_unique(); + impl->handle = CreateFiber(0, &FiberStartFunc, this); +} + +Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} { + impl = std::make_unique(); +} + +Fiber::~Fiber() { + // Make sure the Fiber is not being used + guard.lock(); + guard.unlock(); + DeleteFiber(impl->handle); +} + +void Fiber::Exit() { + if (!is_thread_fiber) { + return; + } + ConvertFiberToThread(); + guard.unlock(); +} + +void Fiber::YieldTo(std::shared_ptr from, std::shared_ptr to) { + to->guard.lock(); + to->previous_fiber = from; + SwitchToFiber(to->impl->handle); + auto previous_fiber = from->previous_fiber; + if (previous_fiber) { + previous_fiber->guard.unlock(); + previous_fiber.reset(); + } +} + +std::shared_ptr Fiber::ThreadToFiber() { + std::shared_ptr fiber = std::shared_ptr{new Fiber()}; + fiber->guard.lock(); + fiber->impl->handle = ConvertThreadToFiber(NULL); + fiber->is_thread_fiber = true; + return fiber; +} + +#else + +#include + +constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB + +struct Fiber::FiberImpl { + boost::context::detail::fcontext_t context; + std::array stack; +}; + +void Fiber::_start(void* parameter) { + guard.lock(); + boost::context::detail::transfer_t* transfer = static_cast(parameter); + if (previous_fiber) { + previous_fiber->impl->context = transfer->fctx; + previous_fiber->guard.unlock(); + previous_fiber = nullptr; + } + entry_point(start_parameter); +} + +static void FiberStartFunc(boost::context::detail::transfer_t transfer) +{ + auto fiber = static_cast(transfer.data); + fiber->_start(&transfer); +} + +Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) + : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} { + impl = std::make_unique(); + auto start_func = std::bind(&Fiber::start, this); + impl->context = + boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(), &start_func); +} + +Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} { + impl = std::make_unique(); +} + +Fiber::~Fiber() { + // Make sure the Fiber is not being used + guard.lock(); + guard.unlock(); +} + +void Fiber::Exit() { + if (!is_thread_fiber) { + return; + } + guard.unlock(); +} + +void Fiber::YieldTo(std::shared_ptr from, std::shared_ptr to) { + to->guard.lock(); + to->previous_fiber = from; + auto transfer = boost::context::detail::jump_fcontext(to->impl.context, nullptr); + auto previous_fiber = from->previous_fiber; + if (previous_fiber) { + previous_fiber->impl->context = transfer.fctx; + previous_fiber->guard.unlock(); + previous_fiber.reset(); + } +} + +std::shared_ptr Fiber::ThreadToFiber() { + std::shared_ptr fiber = std::shared_ptr{new Fiber()}; + fiber->is_thread_fiber = true; + return fiber; +} + +#endif +} // namespace Common diff --git a/src/common/fiber.h b/src/common/fiber.h new file mode 100644 index 000000000..ab44905cf --- /dev/null +++ b/src/common/fiber.h @@ -0,0 +1,55 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "common/spin_lock.h" + +namespace Common { + +class Fiber { +public: + Fiber(std::function&& entry_point_func, void* start_parameter); + ~Fiber(); + + Fiber(const Fiber&) = delete; + Fiber& operator=(const Fiber&) = delete; + + Fiber(Fiber&&) = default; + Fiber& operator=(Fiber&&) = default; + + /// Yields control from Fiber 'from' to Fiber 'to' + /// Fiber 'from' must be the currently running fiber. + static void YieldTo(std::shared_ptr from, std::shared_ptr to); + static std::shared_ptr ThreadToFiber(); + + /// Only call from main thread's fiber + void Exit(); + + /// Used internally but required to be public, Shall not be used + void _start(void* parameter); + + /// Changes the start parameter of the fiber. Has no effect if the fiber already started + void SetStartParameter(void* new_parameter) { + start_parameter = new_parameter; + } + +private: + Fiber(); + + struct FiberImpl; + + SpinLock guard; + std::function entry_point; + void* start_parameter; + std::shared_ptr previous_fiber; + std::unique_ptr impl; + bool is_thread_fiber{}; +}; + +} // namespace Common -- cgit v1.2.3 From 8d0e3c542258cc50081af93aa85e0e3cbf8900c3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 5 Feb 2020 14:13:16 -0400 Subject: Tests: Add tests for fibers and refactor/fix Fiber class --- src/common/fiber.cpp | 32 +++---- src/common/fiber.h | 19 +++- src/tests/CMakeLists.txt | 1 + src/tests/common/fibers.cpp | 214 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 247 insertions(+), 19 deletions(-) create mode 100644 src/tests/common/fibers.cpp (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index eb59f1aa9..a2c0401c4 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -3,18 +3,21 @@ // Refer to the license.txt file included. #include "common/fiber.h" +#ifdef _MSC_VER +#include +#else +#include +#endif namespace Common { #ifdef _MSC_VER -#include struct Fiber::FiberImpl { LPVOID handle = nullptr; }; -void Fiber::_start([[maybe_unused]] void* parameter) { - guard.lock(); +void Fiber::start() { if (previous_fiber) { previous_fiber->guard.unlock(); previous_fiber = nullptr; @@ -22,10 +25,10 @@ void Fiber::_start([[maybe_unused]] void* parameter) { entry_point(start_parameter); } -static void __stdcall FiberStartFunc(LPVOID lpFiberParameter) +void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) { - auto fiber = static_cast(lpFiberParameter); - fiber->_start(nullptr); + auto fiber = static_cast(fiber_parameter); + fiber->start(); } Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) @@ -74,30 +77,26 @@ std::shared_ptr Fiber::ThreadToFiber() { #else -#include - constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB -struct Fiber::FiberImpl { - boost::context::detail::fcontext_t context; +struct alignas(64) Fiber::FiberImpl { std::array stack; + boost::context::detail::fcontext_t context; }; -void Fiber::_start(void* parameter) { - guard.lock(); - boost::context::detail::transfer_t* transfer = static_cast(parameter); +void Fiber::start(boost::context::detail::transfer_t& transfer) { if (previous_fiber) { - previous_fiber->impl->context = transfer->fctx; + previous_fiber->impl->context = transfer.fctx; previous_fiber->guard.unlock(); previous_fiber = nullptr; } entry_point(start_parameter); } -static void FiberStartFunc(boost::context::detail::transfer_t transfer) +void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) { auto fiber = static_cast(transfer.data); - fiber->_start(&transfer); + fiber->start(transfer); } Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) @@ -139,6 +138,7 @@ void Fiber::YieldTo(std::shared_ptr from, std::shared_ptr to) { std::shared_ptr Fiber::ThreadToFiber() { std::shared_ptr fiber = std::shared_ptr{new Fiber()}; + fiber->guard.lock(); fiber->is_thread_fiber = true; return fiber; } diff --git a/src/common/fiber.h b/src/common/fiber.h index ab44905cf..812d6644a 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -10,6 +10,12 @@ #include "common/common_types.h" #include "common/spin_lock.h" +#ifndef _MSC_VER +namespace boost::context::detail { + struct transfer_t; +} +#endif + namespace Common { class Fiber { @@ -31,9 +37,6 @@ public: /// Only call from main thread's fiber void Exit(); - /// Used internally but required to be public, Shall not be used - void _start(void* parameter); - /// Changes the start parameter of the fiber. Has no effect if the fiber already started void SetStartParameter(void* new_parameter) { start_parameter = new_parameter; @@ -42,6 +45,16 @@ public: private: Fiber(); +#ifdef _MSC_VER + void start(); + static void FiberStartFunc(void* fiber_parameter); +#else + void start(boost::context::detail::transfer_t& transfer); + static void FiberStartFunc(boost::context::detail::transfer_t transfer); +#endif + + + struct FiberImpl; SpinLock guard; diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index c7038b217..47ef30aa9 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -1,6 +1,7 @@ add_executable(tests common/bit_field.cpp common/bit_utils.cpp + common/fibers.cpp common/multi_level_queue.cpp common/param_package.cpp common/ring_buffer.cpp diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp new file mode 100644 index 000000000..ff840afa6 --- /dev/null +++ b/src/tests/common/fibers.cpp @@ -0,0 +1,214 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "common/common_types.h" +#include "common/fiber.h" +#include "common/spin_lock.h" + +namespace Common { + +class TestControl1 { +public: + TestControl1() = default; + + void DoWork(); + + void ExecuteThread(u32 id); + + std::unordered_map ids; + std::vector> thread_fibers; + std::vector> work_fibers; + std::vector items; + std::vector results; +}; + +static void WorkControl1(void* control) { + TestControl1* test_control = static_cast(control); + test_control->DoWork(); +} + +void TestControl1::DoWork() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + u32 value = items[id]; + for (u32 i = 0; i < id; i++) { + value++; + } + results[id] = value; + Fiber::YieldTo(work_fibers[id], thread_fibers[id]); +} + +void TestControl1::ExecuteThread(u32 id) { + std::thread::id this_id = std::this_thread::get_id(); + ids[this_id] = id; + auto thread_fiber = Fiber::ThreadToFiber(); + thread_fibers[id] = thread_fiber; + work_fibers[id] = std::make_shared(std::function{WorkControl1}, this); + items[id] = rand() % 256; + Fiber::YieldTo(thread_fibers[id], work_fibers[id]); + thread_fibers[id]->Exit(); +} + +static void ThreadStart1(u32 id, TestControl1& test_control) { + test_control.ExecuteThread(id); +} + + +TEST_CASE("Fibers::Setup", "[common]") { + constexpr u32 num_threads = 7; + TestControl1 test_control{}; + test_control.thread_fibers.resize(num_threads, nullptr); + test_control.work_fibers.resize(num_threads, nullptr); + test_control.items.resize(num_threads, 0); + test_control.results.resize(num_threads, 0); + std::vector threads; + for (u32 i = 0; i < num_threads; i++) { + threads.emplace_back(ThreadStart1, i, std::ref(test_control)); + } + for (u32 i = 0; i < num_threads; i++) { + threads[i].join(); + } + for (u32 i = 0; i < num_threads; i++) { + REQUIRE(test_control.items[i] + i == test_control.results[i]); + } +} + +class TestControl2 { +public: + TestControl2() = default; + + void DoWork1() { + trap2 = false; + while (trap.load()); + for (u32 i = 0; i < 12000; i++) { + value1 += i; + } + Fiber::YieldTo(fiber1, fiber3); + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + assert1 = id == 1; + value2 += 5000; + Fiber::YieldTo(fiber1, thread_fibers[id]); + } + + void DoWork2() { + while (trap2.load()); + value2 = 2000; + trap = false; + Fiber::YieldTo(fiber2, fiber1); + assert3 = false; + } + + void DoWork3() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + assert2 = id == 0; + value1 += 1000; + Fiber::YieldTo(fiber3, thread_fibers[id]); + } + + void ExecuteThread(u32 id); + + void CallFiber1() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + Fiber::YieldTo(thread_fibers[id], fiber1); + } + + void CallFiber2() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + Fiber::YieldTo(thread_fibers[id], fiber2); + } + + void Exit(); + + bool assert1{}; + bool assert2{}; + bool assert3{true}; + u32 value1{}; + u32 value2{}; + std::atomic trap{true}; + std::atomic trap2{true}; + std::unordered_map ids; + std::vector> thread_fibers; + std::shared_ptr fiber1; + std::shared_ptr fiber2; + std::shared_ptr fiber3; +}; + +static void WorkControl2_1(void* control) { + TestControl2* test_control = static_cast(control); + test_control->DoWork1(); +} + +static void WorkControl2_2(void* control) { + TestControl2* test_control = static_cast(control); + test_control->DoWork2(); +} + +static void WorkControl2_3(void* control) { + TestControl2* test_control = static_cast(control); + test_control->DoWork3(); +} + +void TestControl2::ExecuteThread(u32 id) { + std::thread::id this_id = std::this_thread::get_id(); + ids[this_id] = id; + auto thread_fiber = Fiber::ThreadToFiber(); + thread_fibers[id] = thread_fiber; +} + +void TestControl2::Exit() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + thread_fibers[id]->Exit(); +} + +static void ThreadStart2_1(u32 id, TestControl2& test_control) { + test_control.ExecuteThread(id); + test_control.CallFiber1(); + test_control.Exit(); +} + +static void ThreadStart2_2(u32 id, TestControl2& test_control) { + test_control.ExecuteThread(id); + test_control.CallFiber2(); + test_control.Exit(); +} + +TEST_CASE("Fibers::InterExchange", "[common]") { + TestControl2 test_control{}; + test_control.thread_fibers.resize(2, nullptr); + test_control.fiber1 = std::make_shared(std::function{WorkControl2_1}, &test_control); + test_control.fiber2 = std::make_shared(std::function{WorkControl2_2}, &test_control); + test_control.fiber3 = std::make_shared(std::function{WorkControl2_3}, &test_control); + std::thread thread1(ThreadStart2_1, 0, std::ref(test_control)); + std::thread thread2(ThreadStart2_2, 1, std::ref(test_control)); + thread1.join(); + thread2.join(); + REQUIRE(test_control.assert1); + REQUIRE(test_control.assert2); + REQUIRE(test_control.assert3); + REQUIRE(test_control.value2 == 7000); + u32 cal_value = 0; + for (u32 i = 0; i < 12000; i++) { + cal_value += i; + } + cal_value += 1000; + REQUIRE(test_control.value1 == cal_value); +} + + +} // namespace Common -- cgit v1.2.3 From be320a9e10fda32a984b12cdfe3aaf09cc67b39a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 5 Feb 2020 15:48:20 -0400 Subject: Common: Polish Fiber class, add comments, asserts and more tests. --- src/common/fiber.cpp | 55 +++++++++++++++----------- src/common/fiber.h | 14 ++++++- src/common/spin_lock.cpp | 7 ++++ src/common/spin_lock.h | 1 + src/tests/common/fibers.cpp | 95 ++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 147 insertions(+), 25 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index a2c0401c4..a88a30ced 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/assert.h" #include "common/fiber.h" #ifdef _MSC_VER #include @@ -18,11 +19,11 @@ struct Fiber::FiberImpl { }; void Fiber::start() { - if (previous_fiber) { - previous_fiber->guard.unlock(); - previous_fiber = nullptr; - } + ASSERT(previous_fiber != nullptr); + previous_fiber->guard.unlock(); + previous_fiber.reset(); entry_point(start_parameter); + UNREACHABLE(); } void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) @@ -43,12 +44,16 @@ Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} { Fiber::~Fiber() { // Make sure the Fiber is not being used - guard.lock(); - guard.unlock(); + bool locked = guard.try_lock(); + ASSERT_MSG(locked, "Destroying a fiber that's still running"); + if (locked) { + guard.unlock(); + } DeleteFiber(impl->handle); } void Fiber::Exit() { + ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber"); if (!is_thread_fiber) { return; } @@ -57,14 +62,15 @@ void Fiber::Exit() { } void Fiber::YieldTo(std::shared_ptr from, std::shared_ptr to) { + ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); + ASSERT_MSG(to != nullptr, "Next fiber is null!"); to->guard.lock(); to->previous_fiber = from; SwitchToFiber(to->impl->handle); auto previous_fiber = from->previous_fiber; - if (previous_fiber) { - previous_fiber->guard.unlock(); - previous_fiber.reset(); - } + ASSERT(previous_fiber != nullptr); + previous_fiber->guard.unlock(); + previous_fiber.reset(); } std::shared_ptr Fiber::ThreadToFiber() { @@ -85,12 +91,12 @@ struct alignas(64) Fiber::FiberImpl { }; void Fiber::start(boost::context::detail::transfer_t& transfer) { - if (previous_fiber) { - previous_fiber->impl->context = transfer.fctx; - previous_fiber->guard.unlock(); - previous_fiber = nullptr; - } + ASSERT(previous_fiber != nullptr); + previous_fiber->impl->context = transfer.fctx; + previous_fiber->guard.unlock(); + previous_fiber.reset(); entry_point(start_parameter); + UNREACHABLE(); } void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) @@ -113,11 +119,15 @@ Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} { Fiber::~Fiber() { // Make sure the Fiber is not being used - guard.lock(); - guard.unlock(); + bool locked = guard.try_lock(); + ASSERT_MSG(locked, "Destroying a fiber that's still running"); + if (locked) { + guard.unlock(); + } } void Fiber::Exit() { + ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber"); if (!is_thread_fiber) { return; } @@ -125,15 +135,16 @@ void Fiber::Exit() { } void Fiber::YieldTo(std::shared_ptr from, std::shared_ptr to) { + ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); + ASSERT_MSG(to != nullptr, "Next fiber is null!"); to->guard.lock(); to->previous_fiber = from; auto transfer = boost::context::detail::jump_fcontext(to->impl.context, nullptr); auto previous_fiber = from->previous_fiber; - if (previous_fiber) { - previous_fiber->impl->context = transfer.fctx; - previous_fiber->guard.unlock(); - previous_fiber.reset(); - } + ASSERT(previous_fiber != nullptr); + previous_fiber->impl->context = transfer.fctx; + previous_fiber->guard.unlock(); + previous_fiber.reset(); } std::shared_ptr Fiber::ThreadToFiber() { diff --git a/src/common/fiber.h b/src/common/fiber.h index 812d6644a..89a01fdd8 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -18,6 +18,18 @@ namespace boost::context::detail { namespace Common { +/** + * Fiber class + * a fiber is a userspace thread with it's own context. They can be used to + * implement coroutines, emulated threading systems and certain asynchronous + * patterns. + * + * This class implements fibers at a low level, thus allowing greater freedom + * to implement such patterns. This fiber class is 'threadsafe' only one fiber + * can be running at a time and threads will be locked while trying to yield to + * a running fiber until it yields. WARNING exchanging two running fibers between + * threads will cause a deadlock. + */ class Fiber { public: Fiber(std::function&& entry_point_func, void* start_parameter); @@ -53,8 +65,6 @@ private: static void FiberStartFunc(boost::context::detail::transfer_t transfer); #endif - - struct FiberImpl; SpinLock guard; diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp index 8077b78d2..82a1d39ff 100644 --- a/src/common/spin_lock.cpp +++ b/src/common/spin_lock.cpp @@ -43,4 +43,11 @@ void SpinLock::unlock() { lck.clear(std::memory_order_release); } +bool SpinLock::try_lock() { + if (lck.test_and_set(std::memory_order_acquire)) { + return false; + } + return true; +} + } // namespace Common diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h index cbc67b6c8..70282a961 100644 --- a/src/common/spin_lock.h +++ b/src/common/spin_lock.h @@ -12,6 +12,7 @@ class SpinLock { public: void lock(); void unlock(); + bool try_lock(); private: std::atomic_flag lck = ATOMIC_FLAG_INIT; diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp index ff840afa6..358393a19 100644 --- a/src/tests/common/fibers.cpp +++ b/src/tests/common/fibers.cpp @@ -64,7 +64,9 @@ static void ThreadStart1(u32 id, TestControl1& test_control) { test_control.ExecuteThread(id); } - +/** This test checks for fiber setup configuration and validates that fibers are + * doing all the work required. + */ TEST_CASE("Fibers::Setup", "[common]") { constexpr u32 num_threads = 7; TestControl1 test_control{}; @@ -188,6 +190,10 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) { test_control.Exit(); } +/** This test checks for fiber thread exchange configuration and validates that fibers are + * that a fiber has been succesfully transfered from one thread to another and that the TLS + * region of the thread is kept while changing fibers. + */ TEST_CASE("Fibers::InterExchange", "[common]") { TestControl2 test_control{}; test_control.thread_fibers.resize(2, nullptr); @@ -210,5 +216,92 @@ TEST_CASE("Fibers::InterExchange", "[common]") { REQUIRE(test_control.value1 == cal_value); } +class TestControl3 { +public: + TestControl3() = default; + + void DoWork1() { + value1 += 1; + Fiber::YieldTo(fiber1, fiber2); + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + value3 += 1; + Fiber::YieldTo(fiber1, thread_fibers[id]); + } + + void DoWork2() { + value2 += 1; + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + Fiber::YieldTo(fiber2, thread_fibers[id]); + } + + void ExecuteThread(u32 id); + + void CallFiber1() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + Fiber::YieldTo(thread_fibers[id], fiber1); + } + + void Exit(); + + u32 value1{}; + u32 value2{}; + u32 value3{}; + std::unordered_map ids; + std::vector> thread_fibers; + std::shared_ptr fiber1; + std::shared_ptr fiber2; +}; + +static void WorkControl3_1(void* control) { + TestControl3* test_control = static_cast(control); + test_control->DoWork1(); +} + +static void WorkControl3_2(void* control) { + TestControl3* test_control = static_cast(control); + test_control->DoWork2(); +} + +void TestControl3::ExecuteThread(u32 id) { + std::thread::id this_id = std::this_thread::get_id(); + ids[this_id] = id; + auto thread_fiber = Fiber::ThreadToFiber(); + thread_fibers[id] = thread_fiber; +} + +void TestControl3::Exit() { + std::thread::id this_id = std::this_thread::get_id(); + u32 id = ids[this_id]; + thread_fibers[id]->Exit(); +} + +static void ThreadStart3(u32 id, TestControl3& test_control) { + test_control.ExecuteThread(id); + test_control.CallFiber1(); + test_control.Exit(); +} + +/** This test checks for one two threads racing for starting the same fiber. + * It checks execution occured in an ordered manner and by no time there were + * two contexts at the same time. + */ +TEST_CASE("Fibers::StartRace", "[common]") { + TestControl3 test_control{}; + test_control.thread_fibers.resize(2, nullptr); + test_control.fiber1 = std::make_shared(std::function{WorkControl3_1}, &test_control); + test_control.fiber2 = std::make_shared(std::function{WorkControl3_2}, &test_control); + std::thread thread1(ThreadStart3, 0, std::ref(test_control)); + std::thread thread2(ThreadStart3, 1, std::ref(test_control)); + thread1.join(); + thread2.join(); + REQUIRE(test_control.value1 == 1); + REQUIRE(test_control.value2 == 1); + REQUIRE(test_control.value3 == 1); +} + + } // namespace Common -- cgit v1.2.3 From 0f8e5a146563d1f245f8f62cb931dc1e0b55de2f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 8 Feb 2020 12:48:57 -0400 Subject: Tests: Add base tests to host timing --- src/common/thread.h | 4 +- src/core/host_timing.cpp | 101 +++++++++++++++++---------- src/core/host_timing.h | 30 +++++++-- src/tests/CMakeLists.txt | 1 + src/tests/core/host_timing.cpp | 150 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 243 insertions(+), 43 deletions(-) create mode 100644 src/tests/core/host_timing.cpp (limited to 'src/common') diff --git a/src/common/thread.h b/src/common/thread.h index 2fc071685..127cc7e23 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -9,6 +9,7 @@ #include #include #include +#include "common/common_types.h" namespace Common { @@ -28,8 +29,7 @@ public: is_set = false; } - template - bool WaitFor(const std::chrono::duration& time) { + bool WaitFor(const std::chrono::nanoseconds& time) { std::unique_lock lk{mutex}; if (!condvar.wait_for(lk, time, [this] { return is_set; })) return false; diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp index c02f571c6..d9514b2c5 100644 --- a/src/core/host_timing.cpp +++ b/src/core/host_timing.cpp @@ -10,7 +10,6 @@ #include #include "common/assert.h" -#include "common/thread.h" #include "core/core_timing_util.h" namespace Core::HostTiming { @@ -47,39 +46,55 @@ void CoreTiming::Initialize() { event_fifo_id = 0; const auto empty_timed_callback = [](u64, s64) {}; ev_lost = CreateEvent("_lost_event", empty_timed_callback); - start_time = std::chrono::system_clock::now(); + start_time = std::chrono::steady_clock::now(); timer_thread = std::make_unique(ThreadEntry, std::ref(*this)); } void CoreTiming::Shutdown() { - std::unique_lock guard(inner_mutex); + paused = true; shutting_down = true; - if (!is_set) { - is_set = true; - condvar.notify_one(); - } - inner_mutex.unlock(); + event.Set(); timer_thread->join(); ClearPendingEvents(); + timer_thread.reset(); + has_started = false; +} + +void CoreTiming::Pause(bool is_paused) { + paused = is_paused; +} + +void CoreTiming::SyncPause(bool is_paused) { + if (is_paused == paused && paused_set == paused) { + return; + } + Pause(is_paused); + event.Set(); + while (paused_set != is_paused); +} + +bool CoreTiming::IsRunning() { + return !paused_set; +} + +bool CoreTiming::HasPendingEvents() { + return !(wait_set && event_queue.empty()); } void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr& event_type, u64 userdata) { - std::lock_guard guard{inner_mutex}; + basic_lock.lock(); const u64 timeout = static_cast(GetGlobalTimeNs().count() + ns_into_future); event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); - if (!is_set) { - is_set = true; - condvar.notify_one(); - } + basic_lock.unlock(); + event.Set(); } void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, u64 userdata) { - std::lock_guard guard{inner_mutex}; - + basic_lock.lock(); const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type.lock().get() == event_type.get() && e.userdata == userdata; }); @@ -89,6 +104,7 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, u event_queue.erase(itr, event_queue.end()); std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); } + basic_lock.unlock(); } u64 CoreTiming::GetCPUTicks() const { @@ -106,7 +122,7 @@ void CoreTiming::ClearPendingEvents() { } void CoreTiming::RemoveEvent(const std::shared_ptr& event_type) { - std::lock_guard guard{inner_mutex}; + basic_lock.lock(); const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type.lock().get() == event_type.get(); @@ -117,43 +133,54 @@ void CoreTiming::RemoveEvent(const std::shared_ptr& event_type) { event_queue.erase(itr, event_queue.end()); std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); } + basic_lock.unlock(); } void CoreTiming::Advance() { - while (true) { - std::unique_lock guard(inner_mutex); - - global_timer = GetGlobalTimeNs().count(); - - while (!event_queue.empty() && event_queue.front().time <= global_timer) { - Event evt = std::move(event_queue.front()); - std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); - event_queue.pop_back(); - inner_mutex.unlock(); + has_started = true; + while (!shutting_down) { + while (!paused) { + paused_set = false; + basic_lock.lock(); + global_timer = GetGlobalTimeNs().count(); + + while (!event_queue.empty() && event_queue.front().time <= global_timer) { + Event evt = std::move(event_queue.front()); + std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); + event_queue.pop_back(); + basic_lock.unlock(); + + if (auto event_type{evt.type.lock()}) { + event_type->callback(evt.userdata, global_timer - evt.time); + } + + basic_lock.lock(); + } - if (auto event_type{evt.type.lock()}) { - event_type->callback(evt.userdata, global_timer - evt.time); + if (!event_queue.empty()) { + std::chrono::nanoseconds next_time = std::chrono::nanoseconds(event_queue.front().time - global_timer); + basic_lock.unlock(); + event.WaitFor(next_time); + } else { + basic_lock.unlock(); + wait_set = true; + event.Wait(); } - inner_mutex.lock(); - } - auto next_time = std::chrono::nanoseconds(event_queue.front().time - global_timer); - condvar.wait_for(guard, next_time, [this] { return is_set; }); - is_set = false; - if (shutting_down) { - break; + wait_set = false; } + paused_set = true; } } std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { - sys_time_point current = std::chrono::system_clock::now(); + sys_time_point current = std::chrono::steady_clock::now(); auto elapsed = current - start_time; return std::chrono::duration_cast(elapsed); } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { - sys_time_point current = std::chrono::system_clock::now(); + sys_time_point current = std::chrono::steady_clock::now(); auto elapsed = current - start_time; return std::chrono::duration_cast(elapsed); } diff --git a/src/core/host_timing.h b/src/core/host_timing.h index a3a32e087..1d053a7fa 100644 --- a/src/core/host_timing.h +++ b/src/core/host_timing.h @@ -14,13 +14,15 @@ #include #include "common/common_types.h" +#include "common/spin_lock.h" +#include "common/thread.h" #include "common/threadsafe_queue.h" namespace Core::HostTiming { /// A callback that may be scheduled for a particular core timing event. using TimedCallback = std::function; -using sys_time_point = std::chrono::time_point; +using sys_time_point = std::chrono::time_point; /// Contains the characteristics of a particular event. struct EventType { @@ -63,6 +65,23 @@ public: /// Tears down all timing related functionality. void Shutdown(); + /// Pauses/Unpauses the execution of the timer thread. + void Pause(bool is_paused); + + /// Pauses/Unpauses the execution of the timer thread and waits until paused. + void SyncPause(bool is_paused); + + /// Checks if core timing is running. + bool IsRunning(); + + /// Checks if the timer thread has started. + bool HasStarted() { + return has_started; + } + + /// Checks if there are any pending time events. + bool HasPendingEvents(); + /// Schedules an event in core timing void ScheduleEvent(s64 ns_into_future, const std::shared_ptr& event_type, u64 userdata = 0); @@ -107,11 +126,14 @@ private: u64 event_fifo_id = 0; std::shared_ptr ev_lost; - bool is_set = false; - std::condition_variable condvar; - std::mutex inner_mutex; + Common::Event event{}; + Common::SpinLock basic_lock{}; std::unique_ptr timer_thread; + std::atomic paused{}; + std::atomic paused_set{}; + std::atomic wait_set{}; std::atomic shutting_down{}; + std::atomic has_started{}; }; /// Creates a core timing event with the given name and callback. diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 47ef30aa9..3f750b51c 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -8,6 +8,7 @@ add_executable(tests core/arm/arm_test_common.cpp core/arm/arm_test_common.h core/core_timing.cpp + core/host_timing.cpp tests.cpp ) diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp new file mode 100644 index 000000000..ca9c8e50a --- /dev/null +++ b/src/tests/core/host_timing.cpp @@ -0,0 +1,150 @@ +// Copyright 2016 Dolphin Emulator Project / 2017 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include +#include +#include +#include +#include + +#include "common/file_util.h" +#include "core/core.h" +#include "core/host_timing.h" + +// Numbers are chosen randomly to make sure the correct one is given. +static constexpr std::array CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; +static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals +static constexpr std::array calls_order{{2,0,1,4,3}}; +static std::array delays{}; + +static std::bitset callbacks_ran_flags; +static u64 expected_callback = 0; +static s64 lateness = 0; + +template +void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) { + static_assert(IDX < CB_IDS.size(), "IDX out of range"); + callbacks_ran_flags.set(IDX); + REQUIRE(CB_IDS[IDX] == userdata); + REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]); + delays[IDX] = nanoseconds_late; + ++expected_callback; +} + +static u64 callbacks_done = 0; + +struct ScopeInit final { + ScopeInit() { + core_timing.Initialize(); + } + ~ScopeInit() { + core_timing.Shutdown(); + } + + Core::HostTiming::CoreTiming core_timing; +}; + +TEST_CASE("HostTiming[BasicOrder]", "[core]") { + ScopeInit guard; + auto& core_timing = guard.core_timing; + std::vector> events; + events.resize(5); + events[0] = + Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>); + events[1] = + Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>); + events[2] = + Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>); + events[3] = + Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>); + events[4] = + Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>); + + expected_callback = 0; + + core_timing.SyncPause(true); + + u64 one_micro = 1000U; + for (std::size_t i = 0; i < events.size(); i++) { + u64 order = calls_order[i]; + core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]); + } + /// test pause + REQUIRE(callbacks_ran_flags.none()); + + core_timing.Pause(false); // No need to sync + + while (core_timing.HasPendingEvents()); + + REQUIRE(callbacks_ran_flags.all()); + + for (std::size_t i = 0; i < delays.size(); i++) { + const double delay = static_cast(delays[i]); + const double micro = delay / 1000.0f; + const double mili = micro / 1000.0f; + printf("HostTimer Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili); + } +} + +#pragma optimize("", off) +u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) { + u64 start = core_timing.GetGlobalTimeNs().count(); + u64 placebo = 0; + for (std::size_t i = 0; i < 1000; i++) { + placebo += core_timing.GetGlobalTimeNs().count(); + } + u64 end = core_timing.GetGlobalTimeNs().count(); + return (end - start); +} +#pragma optimize("", on) + +TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") { + ScopeInit guard; + auto& core_timing = guard.core_timing; + std::vector> events; + events.resize(5); + events[0] = + Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>); + events[1] = + Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>); + events[2] = + Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>); + events[3] = + Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>); + events[4] = + Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>); + + core_timing.SyncPause(true); + core_timing.SyncPause(false); + + expected_callback = 0; + + u64 start = core_timing.GetGlobalTimeNs().count(); + u64 one_micro = 1000U; + for (std::size_t i = 0; i < events.size(); i++) { + u64 order = calls_order[i]; + core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]); + } + u64 end = core_timing.GetGlobalTimeNs().count(); + const double scheduling_time = static_cast(end - start); + const double timer_time = static_cast(TestTimerSpeed(core_timing)); + + while (core_timing.HasPendingEvents()); + + REQUIRE(callbacks_ran_flags.all()); + + for (std::size_t i = 0; i < delays.size(); i++) { + const double delay = static_cast(delays[i]); + const double micro = delay / 1000.0f; + const double mili = micro / 1000.0f; + printf("HostTimer No Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili); + } + + const double micro = scheduling_time / 1000.0f; + const double mili = micro / 1000.0f; + printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili); + printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f, timer_time / 1000000.f); +} -- cgit v1.2.3 From 234b5ff6a999d7d69cdcdf214e0c3984cdab11cf Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 9 Feb 2020 16:53:22 -0400 Subject: Common: Implement WallClock Interface and implement a native clock for x64 --- src/common/CMakeLists.txt | 4 ++ src/common/wall_clock.cpp | 90 ++++++++++++++++++++++++++++ src/common/wall_clock.h | 40 +++++++++++++ src/common/x64/cpu_detect.cpp | 33 +++++++++++ src/common/x64/cpu_detect.h | 12 ++++ src/common/x64/native_clock.cpp | 128 ++++++++++++++++++++++++++++++++++++++++ src/common/x64/native_clock.h | 41 +++++++++++++ src/core/host_timing.cpp | 21 +++---- src/core/host_timing.h | 4 +- src/tests/core/host_timing.cpp | 45 ++++++-------- 10 files changed, 378 insertions(+), 40 deletions(-) create mode 100644 src/common/wall_clock.cpp create mode 100644 src/common/wall_clock.h create mode 100644 src/common/x64/native_clock.cpp create mode 100644 src/common/x64/native_clock.h (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 554d6e253..aacea0ab7 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -167,6 +167,8 @@ add_library(common STATIC vector_math.h virtual_buffer.cpp virtual_buffer.h + wall_clock.cpp + wall_clock.h web_result.h zstd_compression.cpp zstd_compression.h @@ -177,6 +179,8 @@ if(ARCHITECTURE_x86_64) PRIVATE x64/cpu_detect.cpp x64/cpu_detect.h + x64/native_clock.cpp + x64/native_clock.h x64/xbyak_abi.h x64/xbyak_util.h ) diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp new file mode 100644 index 000000000..eabbba9da --- /dev/null +++ b/src/common/wall_clock.cpp @@ -0,0 +1,90 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/uint128.h" +#include "common/wall_clock.h" + +#ifdef ARCHITECTURE_x86_64 +#include "common/x64/cpu_detect.h" +#include "common/x64/native_clock.h" +#endif + +namespace Common { + +using base_timer = std::chrono::steady_clock; +using base_time_point = std::chrono::time_point; + +class StandardWallClock : public WallClock { +public: + StandardWallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency) + : WallClock(emulated_cpu_frequency, emulated_clock_frequency, false) { + start_time = base_timer::now(); + } + + std::chrono::nanoseconds GetTimeNS() override { + base_time_point current = base_timer::now(); + auto elapsed = current - start_time; + return std::chrono::duration_cast(elapsed); + } + + std::chrono::microseconds GetTimeUS() override { + base_time_point current = base_timer::now(); + auto elapsed = current - start_time; + return std::chrono::duration_cast(elapsed); + } + + std::chrono::milliseconds GetTimeMS() override { + base_time_point current = base_timer::now(); + auto elapsed = current - start_time; + return std::chrono::duration_cast(elapsed); + } + + u64 GetClockCycles() override { + std::chrono::nanoseconds time_now = GetTimeNS(); + const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_clock_frequency); + return Common::Divide128On32(temporal, 1000000000).first; + } + + u64 GetCPUCycles() override { + std::chrono::nanoseconds time_now = GetTimeNS(); + const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency); + return Common::Divide128On32(temporal, 1000000000).first; + } + +private: + base_time_point start_time; +}; + +#ifdef ARCHITECTURE_x86_64 + +WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { + const auto& caps = GetCPUCaps(); + u64 rtsc_frequency = 0; + if (caps.invariant_tsc) { + if (caps.base_frequency != 0) { + rtsc_frequency = static_cast(caps.base_frequency) * 1000000U; + } + if (rtsc_frequency == 0) { + rtsc_frequency = EstimateRDTSCFrequency(); + } + } + if (rtsc_frequency == 0) { + return static_cast( + new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency)); + } else { + return static_cast( + new X64::NativeClock(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency)); + } +} + +#else + +WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { + return static_cast( + new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency)); +} + +#endif + +} // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h new file mode 100644 index 000000000..6f763d74b --- /dev/null +++ b/src/common/wall_clock.h @@ -0,0 +1,40 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Common { + +class WallClock { +public: + virtual std::chrono::nanoseconds GetTimeNS() = 0; + virtual std::chrono::microseconds GetTimeUS() = 0; + virtual std::chrono::milliseconds GetTimeMS() = 0; + virtual u64 GetClockCycles() = 0; + virtual u64 GetCPUCycles() = 0; + + /// Tells if the wall clock, uses the host CPU's hardware clock + bool IsNative() const { + return is_native; + } + +protected: + WallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, bool is_native) + : emulated_cpu_frequency{emulated_cpu_frequency}, + emulated_clock_frequency{emulated_clock_frequency}, is_native{is_native} {} + + u64 emulated_cpu_frequency; + u64 emulated_clock_frequency; + +private: + bool is_native; +}; + +WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency); + +} // namespace Common diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c9349a6b4..d767c544c 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -62,6 +62,17 @@ static CPUCaps Detect() { std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); + if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69) + caps.manufacturer = Manufacturer::Intel; + else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65) + caps.manufacturer = Manufacturer::AMD; + else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e) + caps.manufacturer = Manufacturer::Hygon; + else + caps.manufacturer = Manufacturer::Unknown; + + u32 family = {}; + u32 model = {}; __cpuid(cpu_id, 0x80000000); @@ -73,6 +84,14 @@ static CPUCaps Detect() { // Detect family and other miscellaneous features if (max_std_fn >= 1) { __cpuid(cpu_id, 0x00000001); + family = (cpu_id[0] >> 8) & 0xf; + model = (cpu_id[0] >> 4) & 0xf; + if (family == 0xf) { + family += (cpu_id[0] >> 20) & 0xff; + } + if (family >= 6) { + model += ((cpu_id[0] >> 16) & 0xf) << 4; + } if ((cpu_id[3] >> 25) & 1) caps.sse = true; @@ -130,6 +149,20 @@ static CPUCaps Detect() { caps.fma4 = true; } + if (max_ex_fn >= 0x80000007) { + __cpuid(cpu_id, 0x80000007); + if (cpu_id[3] & (1 << 8)) { + caps.invariant_tsc = true; + } + } + + if (max_std_fn >= 0x16) { + __cpuid(cpu_id, 0x16); + caps.base_frequency = cpu_id[0]; + caps.max_frequency = cpu_id[1]; + caps.bus_frequency = cpu_id[2]; + } + return caps; } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 20f2ba234..f0676fa5e 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -6,8 +6,16 @@ namespace Common { +enum class Manufacturer : u32 { + Intel = 0, + AMD = 1, + Hygon = 2, + Unknown = 3, +}; + /// x86/x64 CPU capabilities that may be detected by this module struct CPUCaps { + Manufacturer manufacturer; char cpu_string[0x21]; char brand_string[0x41]; bool sse; @@ -24,6 +32,10 @@ struct CPUCaps { bool fma; bool fma4; bool aes; + bool invariant_tsc; + u32 base_frequency; + u32 max_frequency; + u32 bus_frequency; }; /** diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp new file mode 100644 index 000000000..c799111fd --- /dev/null +++ b/src/common/x64/native_clock.cpp @@ -0,0 +1,128 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "common/x64/native_clock.h" + +namespace Common { + +#ifdef _MSC_VER + +namespace { + +struct uint128 { + u64 low; + u64 high; +}; + +u64 umuldiv64(u64 a, u64 b, u64 d) { + uint128 r{}; + r.low = _umul128(a, b, &r.high); + u64 remainder; + return _udiv128(r.high, r.low, d, &remainder); +} + +} // namespace + +#else + +namespace { + +u64 umuldiv64(u64 a, u64 b, u64 d) { + const u64 diva = a / d; + const u64 moda = a % d; + const u64 divb = b / d; + const u64 modb = b % d; + return diva * b + moda * divb + moda * modb / d; +} + +} // namespace + +#endif + +u64 EstimateRDTSCFrequency() { + const auto milli_10 = std::chrono::milliseconds{10}; + // get current time + _mm_mfence(); + const u64 tscStart = __rdtsc(); + const auto startTime = std::chrono::high_resolution_clock::now(); + // wait roughly 3 seconds + while (true) { + auto milli = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - startTime); + if (milli.count() >= 3000) + break; + std::this_thread::sleep_for(milli_10); + } + const auto endTime = std::chrono::high_resolution_clock::now(); + _mm_mfence(); + const u64 tscEnd = __rdtsc(); + // calculate difference + const u64 timer_diff = + std::chrono::duration_cast(endTime - startTime).count(); + const u64 tsc_diff = tscEnd - tscStart; + const u64 tsc_freq = umuldiv64(tsc_diff, 1000000000ULL, timer_diff); + return tsc_freq; +} + +namespace X64 { +NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, + u64 rtsc_frequency) + : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{ + rtsc_frequency} { + _mm_mfence(); + last_measure = __rdtsc(); + accumulated_ticks = 0U; +} + +u64 NativeClock::GetRTSC() { + rtsc_serialize.lock(); + _mm_mfence(); + const u64 current_measure = __rdtsc(); + u64 diff = current_measure - last_measure; + diff = diff & ~static_cast(static_cast(diff) >> 63); // max(diff, 0) + if (current_measure > last_measure) { + last_measure = current_measure; + } + accumulated_ticks += diff; + rtsc_serialize.unlock(); + return accumulated_ticks; +} + +std::chrono::nanoseconds NativeClock::GetTimeNS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::nanoseconds{umuldiv64(rtsc_value, 1000000000, rtsc_frequency)}; +} + +std::chrono::microseconds NativeClock::GetTimeUS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::microseconds{umuldiv64(rtsc_value, 1000000, rtsc_frequency)}; +} + +std::chrono::milliseconds NativeClock::GetTimeMS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::milliseconds{umuldiv64(rtsc_value, 1000, rtsc_frequency)}; +} + +u64 NativeClock::GetClockCycles() { + const u64 rtsc_value = GetRTSC(); + return umuldiv64(rtsc_value, emulated_clock_frequency, rtsc_frequency); +} + +u64 NativeClock::GetCPUCycles() { + const u64 rtsc_value = GetRTSC(); + return umuldiv64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); +} + +} // namespace X64 + +} // namespace Common diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h new file mode 100644 index 000000000..b58cf9f5a --- /dev/null +++ b/src/common/x64/native_clock.h @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/spin_lock.h" +#include "common/wall_clock.h" + +namespace Common { + +namespace X64 { +class NativeClock : public WallClock { +public: + NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency); + + std::chrono::nanoseconds GetTimeNS() override; + + std::chrono::microseconds GetTimeUS() override; + + std::chrono::milliseconds GetTimeMS() override; + + u64 GetClockCycles() override; + + u64 GetCPUCycles() override; + +private: + u64 GetRTSC(); + + SpinLock rtsc_serialize{}; + u64 last_measure{}; + u64 accumulated_ticks{}; + u64 rtsc_frequency; +}; +} // namespace X64 + +u64 EstimateRDTSCFrequency(); + +} // namespace Common diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp index d9514b2c5..ef9977b76 100644 --- a/src/core/host_timing.cpp +++ b/src/core/host_timing.cpp @@ -35,7 +35,11 @@ struct CoreTiming::Event { } }; -CoreTiming::CoreTiming() = default; +CoreTiming::CoreTiming() { + Common::WallClock* wall = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ); + clock = std::unique_ptr(wall); +} + CoreTiming::~CoreTiming() = default; void CoreTiming::ThreadEntry(CoreTiming& instance) { @@ -46,7 +50,6 @@ void CoreTiming::Initialize() { event_fifo_id = 0; const auto empty_timed_callback = [](u64, s64) {}; ev_lost = CreateEvent("_lost_event", empty_timed_callback); - start_time = std::chrono::steady_clock::now(); timer_thread = std::make_unique(ThreadEntry, std::ref(*this)); } @@ -108,13 +111,11 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, u } u64 CoreTiming::GetCPUTicks() const { - std::chrono::nanoseconds time_now = GetGlobalTimeNs(); - return Core::Timing::nsToCycles(time_now); + return clock->GetCPUCycles(); } u64 CoreTiming::GetClockTicks() const { - std::chrono::nanoseconds time_now = GetGlobalTimeNs(); - return Core::Timing::nsToClockCycles(time_now); + return clock->GetClockCycles(); } void CoreTiming::ClearPendingEvents() { @@ -174,15 +175,11 @@ void CoreTiming::Advance() { } std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { - sys_time_point current = std::chrono::steady_clock::now(); - auto elapsed = current - start_time; - return std::chrono::duration_cast(elapsed); + return clock->GetTimeNS(); } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { - sys_time_point current = std::chrono::steady_clock::now(); - auto elapsed = current - start_time; - return std::chrono::duration_cast(elapsed); + return clock->GetTimeUS(); } } // namespace Core::Timing diff --git a/src/core/host_timing.h b/src/core/host_timing.h index 1d053a7fa..f04a150ee 100644 --- a/src/core/host_timing.h +++ b/src/core/host_timing.h @@ -17,12 +17,12 @@ #include "common/spin_lock.h" #include "common/thread.h" #include "common/threadsafe_queue.h" +#include "common/wall_clock.h" namespace Core::HostTiming { /// A callback that may be scheduled for a particular core timing event. using TimedCallback = std::function; -using sys_time_point = std::chrono::time_point; /// Contains the characteristics of a particular event. struct EventType { @@ -112,7 +112,7 @@ private: static void ThreadEntry(CoreTiming& instance); void Advance(); - sys_time_point start_time; + std::unique_ptr clock; u64 global_timer = 0; diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp index ca9c8e50a..3d0532d02 100644 --- a/src/tests/core/host_timing.cpp +++ b/src/tests/core/host_timing.cpp @@ -17,7 +17,7 @@ // Numbers are chosen randomly to make sure the correct one is given. static constexpr std::array CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals -static constexpr std::array calls_order{{2,0,1,4,3}}; +static constexpr std::array calls_order{{2, 0, 1, 4, 3}}; static std::array delays{}; static std::bitset callbacks_ran_flags; @@ -52,16 +52,11 @@ TEST_CASE("HostTiming[BasicOrder]", "[core]") { auto& core_timing = guard.core_timing; std::vector> events; events.resize(5); - events[0] = - Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>); - events[1] = - Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>); - events[2] = - Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>); - events[3] = - Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>); - events[4] = - Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>); + events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>); + events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>); + events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>); + events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>); + events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>); expected_callback = 0; @@ -70,14 +65,15 @@ TEST_CASE("HostTiming[BasicOrder]", "[core]") { u64 one_micro = 1000U; for (std::size_t i = 0; i < events.size(); i++) { u64 order = calls_order[i]; - core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]); + core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]); } /// test pause REQUIRE(callbacks_ran_flags.none()); core_timing.Pause(false); // No need to sync - while (core_timing.HasPendingEvents()); + while (core_timing.HasPendingEvents()) + ; REQUIRE(callbacks_ran_flags.all()); @@ -106,16 +102,11 @@ TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") { auto& core_timing = guard.core_timing; std::vector> events; events.resize(5); - events[0] = - Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>); - events[1] = - Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>); - events[2] = - Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>); - events[3] = - Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>); - events[4] = - Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>); + events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>); + events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>); + events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>); + events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>); + events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>); core_timing.SyncPause(true); core_timing.SyncPause(false); @@ -126,13 +117,14 @@ TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") { u64 one_micro = 1000U; for (std::size_t i = 0; i < events.size(); i++) { u64 order = calls_order[i]; - core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]); + core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]); } u64 end = core_timing.GetGlobalTimeNs().count(); const double scheduling_time = static_cast(end - start); const double timer_time = static_cast(TestTimerSpeed(core_timing)); - while (core_timing.HasPendingEvents()); + while (core_timing.HasPendingEvents()) + ; REQUIRE(callbacks_ran_flags.all()); @@ -146,5 +138,6 @@ TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") { const double micro = scheduling_time / 1000.0f; const double mili = micro / 1000.0f; printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili); - printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f, timer_time / 1000000.f); + printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f, + timer_time / 1000000.f); } -- cgit v1.2.3 From e3524d114246a9221c766bdf1992777b208cbd67 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Feb 2020 11:20:40 -0400 Subject: Common: Refactor & Document Wall clock. --- src/common/uint128.cpp | 22 +++++++++++++++++++ src/common/uint128.h | 3 +++ src/common/wall_clock.cpp | 13 +++++------- src/common/wall_clock.h | 13 +++++++++++- src/common/x64/native_clock.cpp | 47 ++++++----------------------------------- src/core/host_timing.cpp | 3 +-- 6 files changed, 50 insertions(+), 51 deletions(-) (limited to 'src/common') diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp index 32bf56730..7e77588db 100644 --- a/src/common/uint128.cpp +++ b/src/common/uint128.cpp @@ -6,12 +6,34 @@ #include #pragma intrinsic(_umul128) +#pragma intrinsic(_udiv128) #endif #include #include "common/uint128.h" namespace Common { +#ifdef _MSC_VER + +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { + u128 r{}; + r[0] = _umul128(a, b, &r[1]); + u64 remainder; + return _udiv128(r[1], r[0], d, &remainder); +} + +#else + +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { + const u64 diva = a / d; + const u64 moda = a % d; + const u64 divb = b / d; + const u64 modb = b % d; + return diva * b + moda * divb + moda * modb / d; +} + +#endif + u128 Multiply64Into128(u64 a, u64 b) { u128 result; #ifdef _MSC_VER diff --git a/src/common/uint128.h b/src/common/uint128.h index a3be2a2cb..503cd2d0c 100644 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -9,6 +9,9 @@ namespace Common { +// This function multiplies 2 u64 values and divides it by a u64 value. +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d); + // This function multiplies 2 u64 values and produces a u128 value; u128 Multiply64Into128(u64 a, u64 b); diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index eabbba9da..8f5e17fa4 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -58,7 +58,7 @@ private: #ifdef ARCHITECTURE_x86_64 -WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { +std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { const auto& caps = GetCPUCaps(); u64 rtsc_frequency = 0; if (caps.invariant_tsc) { @@ -70,19 +70,16 @@ WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_cloc } } if (rtsc_frequency == 0) { - return static_cast( - new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency)); + return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); } else { - return static_cast( - new X64::NativeClock(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency)); + return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency); } } #else -WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { - return static_cast( - new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency)); +std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { + return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); } #endif diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 6f763d74b..fc34429bb 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "common/common_types.h" @@ -12,10 +13,20 @@ namespace Common { class WallClock { public: + + /// Returns current wall time in nanoseconds virtual std::chrono::nanoseconds GetTimeNS() = 0; + + /// Returns current wall time in microseconds virtual std::chrono::microseconds GetTimeUS() = 0; + + /// Returns current wall time in milliseconds virtual std::chrono::milliseconds GetTimeMS() = 0; + + /// Returns current wall time in emulated clock cycles virtual u64 GetClockCycles() = 0; + + /// Returns current wall time in emulated cpu cycles virtual u64 GetCPUCycles() = 0; /// Tells if the wall clock, uses the host CPU's hardware clock @@ -35,6 +46,6 @@ private: bool is_native; }; -WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency); +std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency); } // namespace Common diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index c799111fd..26d4d0ba6 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -11,44 +11,11 @@ #include #endif +#include "common/uint128.h" #include "common/x64/native_clock.h" namespace Common { -#ifdef _MSC_VER - -namespace { - -struct uint128 { - u64 low; - u64 high; -}; - -u64 umuldiv64(u64 a, u64 b, u64 d) { - uint128 r{}; - r.low = _umul128(a, b, &r.high); - u64 remainder; - return _udiv128(r.high, r.low, d, &remainder); -} - -} // namespace - -#else - -namespace { - -u64 umuldiv64(u64 a, u64 b, u64 d) { - const u64 diva = a / d; - const u64 moda = a % d; - const u64 divb = b / d; - const u64 modb = b % d; - return diva * b + moda * divb + moda * modb / d; -} - -} // namespace - -#endif - u64 EstimateRDTSCFrequency() { const auto milli_10 = std::chrono::milliseconds{10}; // get current time @@ -70,7 +37,7 @@ u64 EstimateRDTSCFrequency() { const u64 timer_diff = std::chrono::duration_cast(endTime - startTime).count(); const u64 tsc_diff = tscEnd - tscStart; - const u64 tsc_freq = umuldiv64(tsc_diff, 1000000000ULL, timer_diff); + const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); return tsc_freq; } @@ -100,27 +67,27 @@ u64 NativeClock::GetRTSC() { std::chrono::nanoseconds NativeClock::GetTimeNS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::nanoseconds{umuldiv64(rtsc_value, 1000000000, rtsc_frequency)}; + return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; } std::chrono::microseconds NativeClock::GetTimeUS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::microseconds{umuldiv64(rtsc_value, 1000000, rtsc_frequency)}; + return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; } std::chrono::milliseconds NativeClock::GetTimeMS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::milliseconds{umuldiv64(rtsc_value, 1000, rtsc_frequency)}; + return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; } u64 NativeClock::GetClockCycles() { const u64 rtsc_value = GetRTSC(); - return umuldiv64(rtsc_value, emulated_clock_frequency, rtsc_frequency); + return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); } u64 NativeClock::GetCPUCycles() { const u64 rtsc_value = GetRTSC(); - return umuldiv64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); + return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); } } // namespace X64 diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp index ef9977b76..4ccf7c6c1 100644 --- a/src/core/host_timing.cpp +++ b/src/core/host_timing.cpp @@ -36,8 +36,7 @@ struct CoreTiming::Event { }; CoreTiming::CoreTiming() { - Common::WallClock* wall = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ); - clock = std::unique_ptr(wall); + clock = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ); } CoreTiming::~CoreTiming() = default; -- cgit v1.2.3 From 03e4f5dac436fe361834e6b9918983e9c4787acb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Feb 2020 13:18:23 -0400 Subject: Common: Correct fcontext fibers. --- src/common/fiber.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index a88a30ced..e91d86dbe 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -12,6 +12,7 @@ namespace Common { + #ifdef _MSC_VER struct Fiber::FiberImpl { @@ -82,7 +83,6 @@ std::shared_ptr Fiber::ThreadToFiber() { } #else - constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB struct alignas(64) Fiber::FiberImpl { @@ -108,9 +108,8 @@ void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} { impl = std::make_unique(); - auto start_func = std::bind(&Fiber::start, this); - impl->context = - boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(), &start_func); + impl->context = boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(), + FiberStartFunc); } Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} { @@ -139,7 +138,7 @@ void Fiber::YieldTo(std::shared_ptr from, std::shared_ptr to) { ASSERT_MSG(to != nullptr, "Next fiber is null!"); to->guard.lock(); to->previous_fiber = from; - auto transfer = boost::context::detail::jump_fcontext(to->impl.context, nullptr); + auto transfer = boost::context::detail::jump_fcontext(to->impl->context, nullptr); auto previous_fiber = from->previous_fiber; ASSERT(previous_fiber != nullptr); previous_fiber->impl->context = transfer.fctx; -- cgit v1.2.3 From 1bd706344e2381e11245b2f0bdc291429e46c634 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Feb 2020 13:33:13 -0400 Subject: Common/Tests: Clang Format. --- src/common/fiber.cpp | 21 ++++++++++----------- src/common/fiber.h | 2 +- src/common/wall_clock.cpp | 12 ++++++++---- src/common/wall_clock.h | 4 ++-- src/core/host_timing.cpp | 8 +++++--- src/core/host_timing.h | 2 +- src/tests/common/fibers.cpp | 23 ++++++++++++++--------- 7 files changed, 41 insertions(+), 31 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index e91d86dbe..a46be73c1 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -12,7 +12,6 @@ namespace Common { - #ifdef _MSC_VER struct Fiber::FiberImpl { @@ -27,14 +26,14 @@ void Fiber::start() { UNREACHABLE(); } -void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) -{ - auto fiber = static_cast(fiber_parameter); - fiber->start(); +void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) { + auto fiber = static_cast(fiber_parameter); + fiber->start(); } Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) - : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} { + : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, + previous_fiber{} { impl = std::make_unique(); impl->handle = CreateFiber(0, &FiberStartFunc, this); } @@ -99,14 +98,14 @@ void Fiber::start(boost::context::detail::transfer_t& transfer) { UNREACHABLE(); } -void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) -{ - auto fiber = static_cast(transfer.data); - fiber->start(transfer); +void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) { + auto fiber = static_cast(transfer.data); + fiber->start(transfer); } Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) - : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} { + : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, + previous_fiber{} { impl = std::make_unique(); impl->context = boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(), FiberStartFunc); diff --git a/src/common/fiber.h b/src/common/fiber.h index 89a01fdd8..b530bf4d2 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -12,7 +12,7 @@ #ifndef _MSC_VER namespace boost::context::detail { - struct transfer_t; +struct transfer_t; } #endif diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 8f5e17fa4..e6161c72c 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -58,7 +58,8 @@ private: #ifdef ARCHITECTURE_x86_64 -std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { +std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, + u32 emulated_clock_frequency) { const auto& caps = GetCPUCaps(); u64 rtsc_frequency = 0; if (caps.invariant_tsc) { @@ -70,15 +71,18 @@ std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, u } } if (rtsc_frequency == 0) { - return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); + return std::make_unique(emulated_cpu_frequency, + emulated_clock_frequency); } else { - return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency); + return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency, + rtsc_frequency); } } #else -std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { +std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, + u32 emulated_clock_frequency) { return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); } diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index fc34429bb..ed284cf50 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -13,7 +13,6 @@ namespace Common { class WallClock { public: - /// Returns current wall time in nanoseconds virtual std::chrono::nanoseconds GetTimeNS() = 0; @@ -46,6 +45,7 @@ private: bool is_native; }; -std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency); +std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, + u32 emulated_clock_frequency); } // namespace Common diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp index 4ccf7c6c1..c734a118e 100644 --- a/src/core/host_timing.cpp +++ b/src/core/host_timing.cpp @@ -72,7 +72,8 @@ void CoreTiming::SyncPause(bool is_paused) { } Pause(is_paused); event.Set(); - while (paused_set != is_paused); + while (paused_set != is_paused) + ; } bool CoreTiming::IsRunning() { @@ -158,7 +159,8 @@ void CoreTiming::Advance() { } if (!event_queue.empty()) { - std::chrono::nanoseconds next_time = std::chrono::nanoseconds(event_queue.front().time - global_timer); + std::chrono::nanoseconds next_time = + std::chrono::nanoseconds(event_queue.front().time - global_timer); basic_lock.unlock(); event.WaitFor(next_time); } else { @@ -181,4 +183,4 @@ std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { return clock->GetTimeUS(); } -} // namespace Core::Timing +} // namespace Core::HostTiming diff --git a/src/core/host_timing.h b/src/core/host_timing.h index f04a150ee..15a150904 100644 --- a/src/core/host_timing.h +++ b/src/core/host_timing.h @@ -145,4 +145,4 @@ private: /// std::shared_ptr CreateEvent(std::string name, TimedCallback&& callback); -} // namespace Core::Timing +} // namespace Core::HostTiming diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp index 358393a19..d63194dd4 100644 --- a/src/tests/common/fibers.cpp +++ b/src/tests/common/fibers.cpp @@ -92,7 +92,8 @@ public: void DoWork1() { trap2 = false; - while (trap.load()); + while (trap.load()) + ; for (u32 i = 0; i < 12000; i++) { value1 += i; } @@ -105,7 +106,8 @@ public: } void DoWork2() { - while (trap2.load()); + while (trap2.load()) + ; value2 = 2000; trap = false; Fiber::YieldTo(fiber2, fiber1); @@ -197,9 +199,12 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) { TEST_CASE("Fibers::InterExchange", "[common]") { TestControl2 test_control{}; test_control.thread_fibers.resize(2, nullptr); - test_control.fiber1 = std::make_shared(std::function{WorkControl2_1}, &test_control); - test_control.fiber2 = std::make_shared(std::function{WorkControl2_2}, &test_control); - test_control.fiber3 = std::make_shared(std::function{WorkControl2_3}, &test_control); + test_control.fiber1 = + std::make_shared(std::function{WorkControl2_1}, &test_control); + test_control.fiber2 = + std::make_shared(std::function{WorkControl2_2}, &test_control); + test_control.fiber3 = + std::make_shared(std::function{WorkControl2_3}, &test_control); std::thread thread1(ThreadStart2_1, 0, std::ref(test_control)); std::thread thread2(ThreadStart2_2, 1, std::ref(test_control)); thread1.join(); @@ -291,8 +296,10 @@ static void ThreadStart3(u32 id, TestControl3& test_control) { TEST_CASE("Fibers::StartRace", "[common]") { TestControl3 test_control{}; test_control.thread_fibers.resize(2, nullptr); - test_control.fiber1 = std::make_shared(std::function{WorkControl3_1}, &test_control); - test_control.fiber2 = std::make_shared(std::function{WorkControl3_2}, &test_control); + test_control.fiber1 = + std::make_shared(std::function{WorkControl3_1}, &test_control); + test_control.fiber2 = + std::make_shared(std::function{WorkControl3_2}, &test_control); std::thread thread1(ThreadStart3, 0, std::ref(test_control)); std::thread thread2(ThreadStart3, 1, std::ref(test_control)); thread1.join(); @@ -302,6 +309,4 @@ TEST_CASE("Fibers::StartRace", "[common]") { REQUIRE(test_control.value3 == 1); } - - } // namespace Common -- cgit v1.2.3 From 3398f701eeac63f3cfcf193f3e9c1ee2f06edb08 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Feb 2020 14:21:23 -0400 Subject: Common: Make MinGW build use Windows Fibers instead of fcontext_t --- src/common/fiber.cpp | 4 ++-- src/common/fiber.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index a46be73c1..050c93acb 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -4,7 +4,7 @@ #include "common/assert.h" #include "common/fiber.h" -#ifdef _MSC_VER +#if defined(_WIN32) || defined(WIN32) #include #else #include @@ -12,7 +12,7 @@ namespace Common { -#ifdef _MSC_VER +#if defined(_WIN32) || defined(WIN32) struct Fiber::FiberImpl { LPVOID handle = nullptr; diff --git a/src/common/fiber.h b/src/common/fiber.h index b530bf4d2..598fe7daa 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -10,7 +10,7 @@ #include "common/common_types.h" #include "common/spin_lock.h" -#ifndef _MSC_VER +#if !defined(_WIN32) && !defined(WIN32) namespace boost::context::detail { struct transfer_t; } @@ -57,7 +57,7 @@ public: private: Fiber(); -#ifdef _MSC_VER +#if defined(_WIN32) || defined(WIN32) void start(); static void FiberStartFunc(void* fiber_parameter); #else -- cgit v1.2.3 From 1f7dd36499786d373b143a4437d4c32e077a32aa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Feb 2020 14:45:08 -0400 Subject: Common/Tests: Address Feedback --- src/common/fiber.cpp | 5 ++--- src/common/fiber.h | 8 ++++---- src/common/spin_lock.cpp | 3 ++- src/core/core_timing_util.cpp | 14 ++++++++++++-- src/core/core_timing_util.h | 2 ++ src/core/host_timing.cpp | 4 ++-- src/core/host_timing.h | 6 +++--- src/tests/common/fibers.cpp | 20 ++++++++++---------- src/tests/core/host_timing.cpp | 28 ++++++++++++++-------------- 9 files changed, 51 insertions(+), 39 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index 050c93acb..1220eddf0 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -32,13 +32,12 @@ void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) { } Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) - : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, - previous_fiber{} { + : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} { impl = std::make_unique(); impl->handle = CreateFiber(0, &FiberStartFunc, this); } -Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} { +Fiber::Fiber() { impl = std::make_unique(); } diff --git a/src/common/fiber.h b/src/common/fiber.h index 598fe7daa..7e3b130a4 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -67,10 +67,10 @@ private: struct FiberImpl; - SpinLock guard; - std::function entry_point; - void* start_parameter; - std::shared_ptr previous_fiber; + SpinLock guard{}; + std::function entry_point{}; + void* start_parameter{}; + std::shared_ptr previous_fiber{}; std::unique_ptr impl; bool is_thread_fiber{}; }; diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp index 82a1d39ff..c7b46aac6 100644 --- a/src/common/spin_lock.cpp +++ b/src/common/spin_lock.cpp @@ -35,8 +35,9 @@ void thread_pause() { namespace Common { void SpinLock::lock() { - while (lck.test_and_set(std::memory_order_acquire)) + while (lck.test_and_set(std::memory_order_acquire)) { thread_pause(); + } } void SpinLock::unlock() { diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp index f42666b4d..be34b26fe 100644 --- a/src/core/core_timing_util.cpp +++ b/src/core/core_timing_util.cpp @@ -49,9 +49,19 @@ s64 nsToCycles(std::chrono::nanoseconds ns) { return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000; } +u64 msToClockCycles(std::chrono::milliseconds ns) { + const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); + return Common::Divide128On32(temp, 1000).first; +} + +u64 usToClockCycles(std::chrono::microseconds ns) { + const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); + return Common::Divide128On32(temp, 1000000).first; +} + u64 nsToClockCycles(std::chrono::nanoseconds ns) { - const u128 temporal = Common::Multiply64Into128(ns.count(), CNTFREQ); - return Common::Divide128On32(temporal, 1000000000).first; + const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); + return Common::Divide128On32(temp, 1000000000).first; } u64 CpuCyclesToClockCycles(u64 ticks) { diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h index 65fb7368b..b3c58447d 100644 --- a/src/core/core_timing_util.h +++ b/src/core/core_timing_util.h @@ -13,6 +13,8 @@ namespace Core::Timing { s64 msToCycles(std::chrono::milliseconds ms); s64 usToCycles(std::chrono::microseconds us); s64 nsToCycles(std::chrono::nanoseconds ns); +u64 msToClockCycles(std::chrono::milliseconds ns); +u64 usToClockCycles(std::chrono::microseconds ns); u64 nsToClockCycles(std::chrono::nanoseconds ns); inline std::chrono::milliseconds CyclesToMs(s64 cycles) { diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp index c734a118e..be80d9f8e 100644 --- a/src/core/host_timing.cpp +++ b/src/core/host_timing.cpp @@ -76,11 +76,11 @@ void CoreTiming::SyncPause(bool is_paused) { ; } -bool CoreTiming::IsRunning() { +bool CoreTiming::IsRunning() const { return !paused_set; } -bool CoreTiming::HasPendingEvents() { +bool CoreTiming::HasPendingEvents() const { return !(wait_set && event_queue.empty()); } diff --git a/src/core/host_timing.h b/src/core/host_timing.h index 15a150904..679fcf491 100644 --- a/src/core/host_timing.h +++ b/src/core/host_timing.h @@ -72,15 +72,15 @@ public: void SyncPause(bool is_paused); /// Checks if core timing is running. - bool IsRunning(); + bool IsRunning() const; /// Checks if the timer thread has started. - bool HasStarted() { + bool HasStarted() const { return has_started; } /// Checks if there are any pending time events. - bool HasPendingEvents(); + bool HasPendingEvents() const; /// Schedules an event in core timing void ScheduleEvent(s64 ns_into_future, const std::shared_ptr& event_type, diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp index d63194dd4..0d3d5153d 100644 --- a/src/tests/common/fibers.cpp +++ b/src/tests/common/fibers.cpp @@ -34,7 +34,7 @@ public: }; static void WorkControl1(void* control) { - TestControl1* test_control = static_cast(control); + auto* test_control = static_cast(control); test_control->DoWork(); } @@ -70,8 +70,8 @@ static void ThreadStart1(u32 id, TestControl1& test_control) { TEST_CASE("Fibers::Setup", "[common]") { constexpr u32 num_threads = 7; TestControl1 test_control{}; - test_control.thread_fibers.resize(num_threads, nullptr); - test_control.work_fibers.resize(num_threads, nullptr); + test_control.thread_fibers.resize(num_threads); + test_control.work_fibers.resize(num_threads); test_control.items.resize(num_threads, 0); test_control.results.resize(num_threads, 0); std::vector threads; @@ -153,17 +153,17 @@ public: }; static void WorkControl2_1(void* control) { - TestControl2* test_control = static_cast(control); + auto* test_control = static_cast(control); test_control->DoWork1(); } static void WorkControl2_2(void* control) { - TestControl2* test_control = static_cast(control); + auto* test_control = static_cast(control); test_control->DoWork2(); } static void WorkControl2_3(void* control) { - TestControl2* test_control = static_cast(control); + auto* test_control = static_cast(control); test_control->DoWork3(); } @@ -198,7 +198,7 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) { */ TEST_CASE("Fibers::InterExchange", "[common]") { TestControl2 test_control{}; - test_control.thread_fibers.resize(2, nullptr); + test_control.thread_fibers.resize(2); test_control.fiber1 = std::make_shared(std::function{WorkControl2_1}, &test_control); test_control.fiber2 = @@ -261,12 +261,12 @@ public: }; static void WorkControl3_1(void* control) { - TestControl3* test_control = static_cast(control); + auto* test_control = static_cast(control); test_control->DoWork1(); } static void WorkControl3_2(void* control) { - TestControl3* test_control = static_cast(control); + auto* test_control = static_cast(control); test_control->DoWork2(); } @@ -295,7 +295,7 @@ static void ThreadStart3(u32 id, TestControl3& test_control) { */ TEST_CASE("Fibers::StartRace", "[common]") { TestControl3 test_control{}; - test_control.thread_fibers.resize(2, nullptr); + test_control.thread_fibers.resize(2); test_control.fiber1 = std::make_shared(std::function{WorkControl3_1}, &test_control); test_control.fiber2 = diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp index 3d0532d02..ed060be55 100644 --- a/src/tests/core/host_timing.cpp +++ b/src/tests/core/host_timing.cpp @@ -50,13 +50,13 @@ struct ScopeInit final { TEST_CASE("HostTiming[BasicOrder]", "[core]") { ScopeInit guard; auto& core_timing = guard.core_timing; - std::vector> events; - events.resize(5); - events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>); - events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>); - events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>); - events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>); - events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>); + std::vector> events{ + Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>), + Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>), + Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>), + Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>), + Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>), + }; expected_callback = 0; @@ -100,13 +100,13 @@ u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) { TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") { ScopeInit guard; auto& core_timing = guard.core_timing; - std::vector> events; - events.resize(5); - events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>); - events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>); - events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>); - events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>); - events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>); + std::vector> events{ + Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>), + Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>), + Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>), + Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>), + Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>), + }; core_timing.SyncPause(true); core_timing.SyncPause(false); -- cgit v1.2.3 From 49a7e0984a1210832b8be24433a95711c7ce029b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Feb 2020 15:02:04 -0400 Subject: Core/HostTiming: Allow events to be advanced manually. --- src/common/fiber.cpp | 2 +- src/common/wall_clock.cpp | 9 +++---- src/core/host_timing.cpp | 61 ++++++++++++++++++++++++++++------------------- src/core/host_timing.h | 6 ++++- 4 files changed, 47 insertions(+), 31 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index 1220eddf0..e9c0946b6 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -110,7 +110,7 @@ Fiber::Fiber(std::function&& entry_point_func, void* start_paramete FiberStartFunc); } -Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} { +Fiber::Fiber() { impl = std::make_unique(); } diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index e6161c72c..d4d35f4e7 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -42,14 +42,15 @@ public: u64 GetClockCycles() override { std::chrono::nanoseconds time_now = GetTimeNS(); - const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_clock_frequency); - return Common::Divide128On32(temporal, 1000000000).first; + const u128 temporary = + Common::Multiply64Into128(time_now.count(), emulated_clock_frequency); + return Common::Divide128On32(temporary, 1000000000).first; } u64 GetCPUCycles() override { std::chrono::nanoseconds time_now = GetTimeNS(); - const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency); - return Common::Divide128On32(temporal, 1000000000).first; + const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency); + return Common::Divide128On32(temporary, 1000000000).first; } private: diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp index be80d9f8e..5d35a96b1 100644 --- a/src/core/host_timing.cpp +++ b/src/core/host_timing.cpp @@ -42,7 +42,7 @@ CoreTiming::CoreTiming() { CoreTiming::~CoreTiming() = default; void CoreTiming::ThreadEntry(CoreTiming& instance) { - instance.Advance(); + instance.ThreadLoop(); } void CoreTiming::Initialize() { @@ -137,38 +137,49 @@ void CoreTiming::RemoveEvent(const std::shared_ptr& event_type) { basic_lock.unlock(); } -void CoreTiming::Advance() { - has_started = true; - while (!shutting_down) { - while (!paused) { - paused_set = false; - basic_lock.lock(); - global_timer = GetGlobalTimeNs().count(); +std::optional CoreTiming::Advance() { + advance_lock.lock(); + basic_lock.lock(); + global_timer = GetGlobalTimeNs().count(); - while (!event_queue.empty() && event_queue.front().time <= global_timer) { - Event evt = std::move(event_queue.front()); - std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); - event_queue.pop_back(); - basic_lock.unlock(); + while (!event_queue.empty() && event_queue.front().time <= global_timer) { + Event evt = std::move(event_queue.front()); + std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); + event_queue.pop_back(); + basic_lock.unlock(); - if (auto event_type{evt.type.lock()}) { - event_type->callback(evt.userdata, global_timer - evt.time); - } + if (auto event_type{evt.type.lock()}) { + event_type->callback(evt.userdata, global_timer - evt.time); + } - basic_lock.lock(); - } + basic_lock.lock(); + } - if (!event_queue.empty()) { - std::chrono::nanoseconds next_time = - std::chrono::nanoseconds(event_queue.front().time - global_timer); - basic_lock.unlock(); - event.WaitFor(next_time); + if (!event_queue.empty()) { + const u64 next_time = event_queue.front().time - global_timer; + basic_lock.unlock(); + advance_lock.unlock(); + return next_time; + } else { + basic_lock.unlock(); + advance_lock.unlock(); + return std::nullopt; + } +} + +void CoreTiming::ThreadLoop() { + has_started = true; + while (!shutting_down) { + while (!paused) { + paused_set = false; + const auto next_time = Advance(); + if (next_time) { + std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time); + event.WaitFor(next_time_ns); } else { - basic_lock.unlock(); wait_set = true; event.Wait(); } - wait_set = false; } paused_set = true; diff --git a/src/core/host_timing.h b/src/core/host_timing.h index 679fcf491..cd44b308c 100644 --- a/src/core/host_timing.h +++ b/src/core/host_timing.h @@ -103,6 +103,9 @@ public: /// Returns current time in nanoseconds. std::chrono::nanoseconds GetGlobalTimeNs() const; + /// Checks for events manually and returns time in nanoseconds for next event, threadsafe. + std::optional Advance(); + private: struct Event; @@ -110,7 +113,7 @@ private: void ClearPendingEvents(); static void ThreadEntry(CoreTiming& instance); - void Advance(); + void ThreadLoop(); std::unique_ptr clock; @@ -128,6 +131,7 @@ private: std::shared_ptr ev_lost; Common::Event event{}; Common::SpinLock basic_lock{}; + Common::SpinLock advance_lock{}; std::unique_ptr timer_thread; std::atomic paused{}; std::atomic paused_set{}; -- cgit v1.2.3 From 7d2b1a6ec4a1c0daea0bac83a83c85f263609224 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 26 Feb 2020 14:39:27 -0400 Subject: Common/Fiber: Correct f_context based Fibers. --- src/common/fiber.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index e9c0946b6..3ef820c62 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -81,10 +81,10 @@ std::shared_ptr Fiber::ThreadToFiber() { } #else -constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB +constexpr std::size_t default_stack_size = 1024 * 1024; // 4MB -struct alignas(64) Fiber::FiberImpl { - std::array stack; +struct Fiber::FiberImpl { + alignas(64) std::array stack; boost::context::detail::fcontext_t context; }; @@ -106,8 +106,10 @@ Fiber::Fiber(std::function&& entry_point_func, void* start_paramete : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} { impl = std::make_unique(); - impl->context = boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(), - FiberStartFunc); + void* stack_start = + static_cast(static_cast(impl->stack.data()) + default_stack_size); + impl->context = + boost::context::detail::make_fcontext(stack_start, impl->stack.size(), FiberStartFunc); } Fiber::Fiber() { @@ -136,7 +138,7 @@ void Fiber::YieldTo(std::shared_ptr from, std::shared_ptr to) { ASSERT_MSG(to != nullptr, "Next fiber is null!"); to->guard.lock(); to->previous_fiber = from; - auto transfer = boost::context::detail::jump_fcontext(to->impl->context, nullptr); + auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get()); auto previous_fiber = from->previous_fiber; ASSERT(previous_fiber != nullptr); previous_fiber->impl->context = transfer.fctx; -- cgit v1.2.3 From 41013381d69f952f78b85de3ce226c1499d889b6 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 26 Feb 2020 17:34:23 -0400 Subject: Common/Fiber: Additional corrections to f_context. --- src/common/fiber.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index 3ef820c62..e4ecc73df 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -81,7 +81,7 @@ std::shared_ptr Fiber::ThreadToFiber() { } #else -constexpr std::size_t default_stack_size = 1024 * 1024; // 4MB +constexpr std::size_t default_stack_size = 1024 * 1024; // 1MB struct Fiber::FiberImpl { alignas(64) std::array stack; @@ -106,10 +106,10 @@ Fiber::Fiber(std::function&& entry_point_func, void* start_paramete : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} { impl = std::make_unique(); - void* stack_start = - static_cast(static_cast(impl->stack.data()) + default_stack_size); + u8* stack_limit = impl->stack.data(); + u8* stack_base = stack_limit + default_stack_size; impl->context = - boost::context::detail::make_fcontext(stack_start, impl->stack.size(), FiberStartFunc); + boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc); } Fiber::Fiber() { -- cgit v1.2.3 From 137d862d9b275209b3d62a413396a15e9e14b4b4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 27 Feb 2020 16:32:47 -0400 Subject: Common/Fiber: Implement Rewinding. --- src/common/fiber.cpp | 32 +++++++++++++++++++++++++++++-- src/common/fiber.h | 8 ++++++++ src/tests/common/fibers.cpp | 46 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 2 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index e4ecc73df..f61479e13 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -12,10 +12,13 @@ namespace Common { +constexpr std::size_t default_stack_size = 256 * 1024; // 256kb + #if defined(_WIN32) || defined(WIN32) struct Fiber::FiberImpl { LPVOID handle = nullptr; + LPVOID rewind_handle = nullptr; }; void Fiber::start() { @@ -26,15 +29,29 @@ void Fiber::start() { UNREACHABLE(); } +void Fiber::onRewind() { + ASSERT(impl->handle != nullptr); + DeleteFiber(impl->handle); + impl->handle = impl->rewind_handle; + impl->rewind_handle = nullptr; + rewind_point(rewind_parameter); + UNREACHABLE(); +} + void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) { auto fiber = static_cast(fiber_parameter); fiber->start(); } +void __stdcall Fiber::RewindStartFunc(void* fiber_parameter) { + auto fiber = static_cast(fiber_parameter); + fiber->onRewind(); +} + Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} { impl = std::make_unique(); - impl->handle = CreateFiber(0, &FiberStartFunc, this); + impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this); } Fiber::Fiber() { @@ -60,6 +77,18 @@ void Fiber::Exit() { guard.unlock(); } +void Fiber::SetRewindPoint(std::function&& rewind_func, void* start_parameter) { + rewind_point = std::move(rewind_func); + rewind_parameter = start_parameter; +} + +void Fiber::Rewind() { + ASSERT(rewind_point); + ASSERT(impl->rewind_handle == nullptr); + impl->rewind_handle = CreateFiber(default_stack_size, &RewindStartFunc, this); + SwitchToFiber(impl->rewind_handle); +} + void Fiber::YieldTo(std::shared_ptr from, std::shared_ptr to) { ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); ASSERT_MSG(to != nullptr, "Next fiber is null!"); @@ -81,7 +110,6 @@ std::shared_ptr Fiber::ThreadToFiber() { } #else -constexpr std::size_t default_stack_size = 1024 * 1024; // 1MB struct Fiber::FiberImpl { alignas(64) std::array stack; diff --git a/src/common/fiber.h b/src/common/fiber.h index 7e3b130a4..a710df257 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -46,6 +46,10 @@ public: static void YieldTo(std::shared_ptr from, std::shared_ptr to); static std::shared_ptr ThreadToFiber(); + void SetRewindPoint(std::function&& rewind_func, void* start_parameter); + + void Rewind(); + /// Only call from main thread's fiber void Exit(); @@ -58,8 +62,10 @@ private: Fiber(); #if defined(_WIN32) || defined(WIN32) + void onRewind(); void start(); static void FiberStartFunc(void* fiber_parameter); + static void RewindStartFunc(void* fiber_parameter); #else void start(boost::context::detail::transfer_t& transfer); static void FiberStartFunc(boost::context::detail::transfer_t transfer); @@ -69,6 +75,8 @@ private: SpinLock guard{}; std::function entry_point{}; + std::function rewind_point{}; + void* rewind_parameter{}; void* start_parameter{}; std::shared_ptr previous_fiber{}; std::unique_ptr impl; diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp index 0d3d5153d..12536b6d8 100644 --- a/src/tests/common/fibers.cpp +++ b/src/tests/common/fibers.cpp @@ -309,4 +309,50 @@ TEST_CASE("Fibers::StartRace", "[common]") { REQUIRE(test_control.value3 == 1); } +class TestControl4; + +static void WorkControl4(void* control); + +class TestControl4 { +public: + TestControl4() { + fiber1 = std::make_shared(std::function{WorkControl4}, this); + goal_reached = false; + rewinded = false; + } + + void Execute() { + thread_fiber = Fiber::ThreadToFiber(); + Fiber::YieldTo(thread_fiber, fiber1); + thread_fiber->Exit(); + } + + void DoWork() { + fiber1->SetRewindPoint(std::function{WorkControl4}, this); + if (rewinded) { + goal_reached = true; + Fiber::YieldTo(fiber1, thread_fiber); + } + rewinded = true; + fiber1->Rewind(); + } + + std::shared_ptr fiber1; + std::shared_ptr thread_fiber; + bool goal_reached; + bool rewinded; +}; + +static void WorkControl4(void* control) { + auto* test_control = static_cast(control); + test_control->DoWork(); +} + +TEST_CASE("Fibers::Rewind", "[common]") { + TestControl4 test_control{}; + test_control.Execute(); + REQUIRE(test_control.goal_reached); + REQUIRE(test_control.rewinded); +} + } // namespace Common -- cgit v1.2.3 From 18f54f74862322d5a9360cbdc3541b6e3f15dce6 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 6 Mar 2020 11:24:08 -0400 Subject: Common/Fiber: Document fiber interexchange. --- src/common/fiber.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/common') diff --git a/src/common/fiber.h b/src/common/fiber.h index a710df257..3bbd506b5 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -28,7 +28,10 @@ namespace Common { * to implement such patterns. This fiber class is 'threadsafe' only one fiber * can be running at a time and threads will be locked while trying to yield to * a running fiber until it yields. WARNING exchanging two running fibers between - * threads will cause a deadlock. + * threads will cause a deadlock. In order to prevent a deadlock, each thread should + * have an intermediary fiber, you switch to the intermediary fiber of the current + * thread and then from it switch to the expected fiber. This way you can exchange + * 2 fibers within 2 different threads. */ class Fiber { public: -- cgit v1.2.3 From 59ce6e6d06e5ce8628f96bb247a342dec356fe43 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 12 Mar 2020 20:10:51 -0400 Subject: Common/uint128: Correct MSVC Compilation in old versions. --- src/common/uint128.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/common') diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp index 7e77588db..16bf7c828 100644 --- a/src/common/uint128.cpp +++ b/src/common/uint128.cpp @@ -19,7 +19,11 @@ u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { u128 r{}; r[0] = _umul128(a, b, &r[1]); u64 remainder; +#if _MSC_VER < 1923 + return udiv128(r[1], r[0], d, &remainder); +#else return _udiv128(r[1], r[0], d, &remainder); +#endif } #else -- cgit v1.2.3 From b6655aa2e492e326e319b09e832c1612bf27acf4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 1 Apr 2020 09:19:10 -0400 Subject: Common/Fiber: Implement Rewind on Boost Context. --- src/common/fiber.cpp | 39 +++++++++++++++++++++++++++++++++++++-- src/common/fiber.h | 2 ++ 2 files changed, 39 insertions(+), 2 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index f61479e13..6ea314d75 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -113,7 +113,11 @@ std::shared_ptr Fiber::ThreadToFiber() { struct Fiber::FiberImpl { alignas(64) std::array stack; + u8* stack_limit; + alignas(64) std::array rewind_stack; + u8* rewind_stack_limit; boost::context::detail::fcontext_t context; + boost::context::detail::fcontext_t rewind_context; }; void Fiber::start(boost::context::detail::transfer_t& transfer) { @@ -125,21 +129,43 @@ void Fiber::start(boost::context::detail::transfer_t& transfer) { UNREACHABLE(); } +void Fiber::onRewind(boost::context::detail::transfer_t& [[maybe_unused]] transfer) { + ASSERT(impl->context != nullptr); + impl->context = impl->rewind_context; + impl->rewind_context = nullptr; + u8* tmp = impl->stack_limit; + impl->stack_limit = impl->rewind_stack_limit; + impl->rewind_stack_limit = tmp; + rewind_point(rewind_parameter); + UNREACHABLE(); +} + void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) { auto fiber = static_cast(transfer.data); fiber->start(transfer); } +void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) { + auto fiber = static_cast(transfer.data); + fiber->onRewind(transfer); +} + Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} { impl = std::make_unique(); - u8* stack_limit = impl->stack.data(); - u8* stack_base = stack_limit + default_stack_size; + impl->stack_limit = impl->stack.data(); + impl->rewind_stack_limit = impl->rewind_stack.data(); + u8* stack_base = impl->stack_limit + default_stack_size; impl->context = boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc); } +void Fiber::SetRewindPoint(std::function&& rewind_func, void* start_parameter) { + rewind_point = std::move(rewind_func); + rewind_parameter = start_parameter; +} + Fiber::Fiber() { impl = std::make_unique(); } @@ -161,6 +187,15 @@ void Fiber::Exit() { guard.unlock(); } +void Fiber::Rewind() { + ASSERT(rewind_point); + ASSERT(impl->rewind_context == nullptr); + u8* stack_base = impl->rewind_stack_limit + default_stack_size; + impl->rewind_context = + boost::context::detail::make_fcontext(stack_base, impl->stack.size(), RewindStartFunc); + boost::context::detail::jump_fcontext(impl->rewind_context, this); +} + void Fiber::YieldTo(std::shared_ptr from, std::shared_ptr to) { ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); ASSERT_MSG(to != nullptr, "Next fiber is null!"); diff --git a/src/common/fiber.h b/src/common/fiber.h index 3bbd506b5..cab7bc4b5 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -70,8 +70,10 @@ private: static void FiberStartFunc(void* fiber_parameter); static void RewindStartFunc(void* fiber_parameter); #else + void onRewind(boost::context::detail::transfer_t& transfer); void start(boost::context::detail::transfer_t& transfer); static void FiberStartFunc(boost::context::detail::transfer_t transfer); + static void RewindStartFunc(boost::context::detail::transfer_t transfer); #endif struct FiberImpl; -- cgit v1.2.3 From e77ee67bfacf9a0d3b9e7cd164531a2be158adc9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 13 May 2020 13:49:36 -0400 Subject: Common/Fiber: Address Feedback and Correct Memory leaks. --- src/common/fiber.cpp | 58 +++++++++++++++++++++++++++++----------------------- src/common/fiber.h | 17 +++++++-------- 2 files changed, 41 insertions(+), 34 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index 6ea314d75..f97ad433b 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -21,7 +21,7 @@ struct Fiber::FiberImpl { LPVOID rewind_handle = nullptr; }; -void Fiber::start() { +void Fiber::Start() { ASSERT(previous_fiber != nullptr); previous_fiber->guard.unlock(); previous_fiber.reset(); @@ -29,7 +29,7 @@ void Fiber::start() { UNREACHABLE(); } -void Fiber::onRewind() { +void Fiber::OnRewind() { ASSERT(impl->handle != nullptr); DeleteFiber(impl->handle); impl->handle = impl->rewind_handle; @@ -38,14 +38,14 @@ void Fiber::onRewind() { UNREACHABLE(); } -void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) { +void Fiber::FiberStartFunc(void* fiber_parameter) { auto fiber = static_cast(fiber_parameter); - fiber->start(); + fiber->Start(); } -void __stdcall Fiber::RewindStartFunc(void* fiber_parameter) { +void Fiber::RewindStartFunc(void* fiber_parameter) { auto fiber = static_cast(fiber_parameter); - fiber->onRewind(); + fiber->OnRewind(); } Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) @@ -59,8 +59,11 @@ Fiber::Fiber() { } Fiber::~Fiber() { + if (released) { + return; + } // Make sure the Fiber is not being used - bool locked = guard.try_lock(); + const bool locked = guard.try_lock(); ASSERT_MSG(locked, "Destroying a fiber that's still running"); if (locked) { guard.unlock(); @@ -75,6 +78,7 @@ void Fiber::Exit() { } ConvertFiberToThread(); guard.unlock(); + released = true; } void Fiber::SetRewindPoint(std::function&& rewind_func, void* start_parameter) { @@ -89,22 +93,21 @@ void Fiber::Rewind() { SwitchToFiber(impl->rewind_handle); } -void Fiber::YieldTo(std::shared_ptr from, std::shared_ptr to) { +void Fiber::YieldTo(std::shared_ptr& from, std::shared_ptr& to) { ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); ASSERT_MSG(to != nullptr, "Next fiber is null!"); to->guard.lock(); to->previous_fiber = from; SwitchToFiber(to->impl->handle); - auto previous_fiber = from->previous_fiber; - ASSERT(previous_fiber != nullptr); - previous_fiber->guard.unlock(); - previous_fiber.reset(); + ASSERT(from->previous_fiber != nullptr); + from->previous_fiber->guard.unlock(); + from->previous_fiber.reset(); } std::shared_ptr Fiber::ThreadToFiber() { std::shared_ptr fiber = std::shared_ptr{new Fiber()}; fiber->guard.lock(); - fiber->impl->handle = ConvertThreadToFiber(NULL); + fiber->impl->handle = ConvertThreadToFiber(nullptr); fiber->is_thread_fiber = true; return fiber; } @@ -120,7 +123,7 @@ struct Fiber::FiberImpl { boost::context::detail::fcontext_t rewind_context; }; -void Fiber::start(boost::context::detail::transfer_t& transfer) { +void Fiber::Start(boost::context::detail::transfer_t& transfer) { ASSERT(previous_fiber != nullptr); previous_fiber->impl->context = transfer.fctx; previous_fiber->guard.unlock(); @@ -129,7 +132,7 @@ void Fiber::start(boost::context::detail::transfer_t& transfer) { UNREACHABLE(); } -void Fiber::onRewind(boost::context::detail::transfer_t& [[maybe_unused]] transfer) { +void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transfer) { ASSERT(impl->context != nullptr); impl->context = impl->rewind_context; impl->rewind_context = nullptr; @@ -142,17 +145,16 @@ void Fiber::onRewind(boost::context::detail::transfer_t& [[maybe_unused]] transf void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) { auto fiber = static_cast(transfer.data); - fiber->start(transfer); + fiber->Start(transfer); } void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) { auto fiber = static_cast(transfer.data); - fiber->onRewind(transfer); + fiber->OnRewind(transfer); } Fiber::Fiber(std::function&& entry_point_func, void* start_parameter) - : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, - previous_fiber{} { + : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} { impl = std::make_unique(); impl->stack_limit = impl->stack.data(); impl->rewind_stack_limit = impl->rewind_stack.data(); @@ -171,8 +173,11 @@ Fiber::Fiber() { } Fiber::~Fiber() { + if (released) { + return; + } // Make sure the Fiber is not being used - bool locked = guard.try_lock(); + const bool locked = guard.try_lock(); ASSERT_MSG(locked, "Destroying a fiber that's still running"); if (locked) { guard.unlock(); @@ -180,11 +185,13 @@ Fiber::~Fiber() { } void Fiber::Exit() { + ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber"); if (!is_thread_fiber) { return; } guard.unlock(); + released = true; } void Fiber::Rewind() { @@ -196,17 +203,16 @@ void Fiber::Rewind() { boost::context::detail::jump_fcontext(impl->rewind_context, this); } -void Fiber::YieldTo(std::shared_ptr from, std::shared_ptr to) { +void Fiber::YieldTo(std::shared_ptr& from, std::shared_ptr& to) { ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); ASSERT_MSG(to != nullptr, "Next fiber is null!"); to->guard.lock(); to->previous_fiber = from; auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get()); - auto previous_fiber = from->previous_fiber; - ASSERT(previous_fiber != nullptr); - previous_fiber->impl->context = transfer.fctx; - previous_fiber->guard.unlock(); - previous_fiber.reset(); + ASSERT(from->previous_fiber != nullptr); + from->previous_fiber->impl->context = transfer.fctx; + from->previous_fiber->guard.unlock(); + from->previous_fiber.reset(); } std::shared_ptr Fiber::ThreadToFiber() { diff --git a/src/common/fiber.h b/src/common/fiber.h index cab7bc4b5..dafc1100e 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -46,7 +46,7 @@ public: /// Yields control from Fiber 'from' to Fiber 'to' /// Fiber 'from' must be the currently running fiber. - static void YieldTo(std::shared_ptr from, std::shared_ptr to); + static void YieldTo(std::shared_ptr& from, std::shared_ptr& to); static std::shared_ptr ThreadToFiber(); void SetRewindPoint(std::function&& rewind_func, void* start_parameter); @@ -65,13 +65,13 @@ private: Fiber(); #if defined(_WIN32) || defined(WIN32) - void onRewind(); - void start(); + void OnRewind(); + void Start(); static void FiberStartFunc(void* fiber_parameter); static void RewindStartFunc(void* fiber_parameter); #else - void onRewind(boost::context::detail::transfer_t& transfer); - void start(boost::context::detail::transfer_t& transfer); + void OnRewind(boost::context::detail::transfer_t& transfer); + void Start(boost::context::detail::transfer_t& transfer); static void FiberStartFunc(boost::context::detail::transfer_t transfer); static void RewindStartFunc(boost::context::detail::transfer_t transfer); #endif @@ -79,13 +79,14 @@ private: struct FiberImpl; SpinLock guard{}; - std::function entry_point{}; - std::function rewind_point{}; + std::function entry_point; + std::function rewind_point; void* rewind_parameter{}; void* start_parameter{}; - std::shared_ptr previous_fiber{}; + std::shared_ptr previous_fiber; std::unique_ptr impl; bool is_thread_fiber{}; + bool released{}; }; } // namespace Common -- cgit v1.2.3 From d6474b4aca7f054d00df350c716709475ef0f49b Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 16 May 2020 07:24:57 -0400 Subject: common/cpu_detect: Add AVX512 detection --- src/common/x64/cpu_detect.cpp | 5 +++++ src/common/x64/cpu_detect.h | 1 + 2 files changed, 6 insertions(+) (limited to 'src/common') diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c9349a6b4..f35dcb498 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -110,6 +110,11 @@ static CPUCaps Detect() { caps.bmi1 = true; if ((cpu_id[1] >> 8) & 1) caps.bmi2 = true; + // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) + if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && + (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { + caps.avx512 = caps.avx2; + } } } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 20f2ba234..7606c3f7b 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -19,6 +19,7 @@ struct CPUCaps { bool lzcnt; bool avx; bool avx2; + bool avx512; bool bmi1; bool bmi2; bool fma; -- cgit v1.2.3 From 97ba520434cceb42af3b17a59c731dd734e9108f Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 16 May 2020 07:25:13 -0400 Subject: common/telemetry: Add AVX512 to telemetry --- src/common/telemetry.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/common') diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index 200c6489a..16d42facd 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -60,6 +60,7 @@ void AppendCPUInfo(FieldCollection& fc) { fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); + fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); -- cgit v1.2.3 From 7b893c7963a57bf41f5dad7dd1709985971ce291 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Jun 2020 19:46:43 -0400 Subject: Common: Fix non-conan build --- src/common/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index aacea0ab7..3cc17d0e9 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -187,6 +187,7 @@ if(ARCHITECTURE_x86_64) endif() create_target_directory_groups(common) +find_package(Boost 1.71 COMPONENTS context headers REQUIRED) -target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) +target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile) target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) -- cgit v1.2.3 From e31425df3877636c098ec7426ebd2067920715cb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 24 Feb 2020 22:04:12 -0400 Subject: General: Recover Prometheus project from harddrive failure This commit: Implements CPU Interrupts, Replaces Cycle Timing for Host Timing, Reworks the Kernel's Scheduler, Introduce Idle State and Suspended State, Recreates the bootmanager, Initializes Multicore system. --- src/common/thread.cpp | 6 + src/core/CMakeLists.txt | 4 +- src/core/arm/arm_interface.h | 5 +- src/core/arm/cpu_interrupt_handler.cpp | 29 ++ src/core/arm/cpu_interrupt_handler.h | 39 ++ src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +- src/core/arm/dynarmic/arm_dynarmic_32.h | 4 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 28 +- src/core/arm/dynarmic/arm_dynarmic_64.h | 4 +- src/core/arm/unicorn/arm_unicorn.cpp | 14 +- src/core/arm/unicorn/arm_unicorn.h | 3 +- src/core/core.cpp | 57 +-- src/core/core.h | 34 +- src/core/core_manager.cpp | 4 +- src/core/core_timing.cpp | 208 +++++------ src/core/core_timing.h | 108 +++--- src/core/cpu_manager.cpp | 194 ++++++++-- src/core/cpu_manager.h | 49 ++- src/core/hle/kernel/kernel.cpp | 84 ++++- src/core/hle/kernel/kernel.h | 19 + src/core/hle/kernel/physical_core.cpp | 37 +- src/core/hle/kernel/physical_core.h | 21 ++ src/core/hle/kernel/process.cpp | 17 +- src/core/hle/kernel/scheduler.cpp | 415 +++++++++++++++------ src/core/hle/kernel/scheduler.h | 94 +++-- src/core/hle/kernel/svc.cpp | 21 +- src/core/hle/kernel/thread.cpp | 232 +++++------- src/core/hle/kernel/thread.h | 81 +++- src/core/hle/kernel/time_manager.cpp | 2 +- src/core/hle/service/hid/controllers/debug_pad.cpp | 2 +- src/core/hle/service/hid/controllers/gesture.cpp | 2 +- src/core/hle/service/hid/controllers/keyboard.cpp | 2 +- src/core/hle/service/hid/controllers/mouse.cpp | 2 +- src/core/hle/service/hid/controllers/npad.cpp | 2 +- src/core/hle/service/hid/controllers/stubbed.cpp | 2 +- .../hle/service/hid/controllers/touchscreen.cpp | 4 +- src/core/hle/service/hid/controllers/xpad.cpp | 2 +- src/core/hle/service/hid/hid.cpp | 16 +- src/core/hle/service/hid/irs.cpp | 2 +- .../hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp | 3 +- src/core/hle/service/nvflinger/nvflinger.cpp | 13 +- .../service/time/standard_steady_clock_core.cpp | 5 +- .../service/time/tick_based_steady_clock_core.cpp | 5 +- src/core/hle/service/time/time.cpp | 5 +- src/core/hle/service/time/time_sharedmemory.cpp | 3 +- src/core/memory.cpp | 11 +- src/core/memory.h | 2 +- src/core/memory/cheat_engine.cpp | 8 +- src/core/tools/freezer.cpp | 8 +- src/tests/CMakeLists.txt | 1 - src/tests/core/core_timing.cpp | 184 +++++---- src/video_core/gpu.cpp | 5 +- src/yuzu/bootmanager.cpp | 32 +- src/yuzu/bootmanager.h | 7 + src/yuzu/debugger/wait_tree.cpp | 6 +- src/yuzu_cmd/yuzu.cpp | 2 +- src/yuzu_tester/yuzu.cpp | 2 +- 57 files changed, 1341 insertions(+), 816 deletions(-) create mode 100644 src/core/arm/cpu_interrupt_handler.cpp create mode 100644 src/core/arm/cpu_interrupt_handler.h (limited to 'src/common') diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 0cd2d10bf..c9684aed9 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -70,6 +70,12 @@ void SetCurrentThreadName(const char* name) { } #endif +#if defined(_WIN32) +void SetCurrentThreadName(const char* name) { + // Do Nothing on MingW +} +#endif + #endif } // namespace Common diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index efbad628f..552094ddb 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -7,6 +7,8 @@ endif() add_library(core STATIC arm/arm_interface.h arm/arm_interface.cpp + arm/cpu_interrupt_handler.cpp + arm/cpu_interrupt_handler.h arm/exclusive_monitor.cpp arm/exclusive_monitor.h arm/unicorn/arm_unicorn.cpp @@ -547,8 +549,6 @@ add_library(core STATIC hle/service/vi/vi_u.h hle/service/wlan/wlan.cpp hle/service/wlan/wlan.h - host_timing.cpp - host_timing.h loader/deconstructed_rom_directory.cpp loader/deconstructed_rom_directory.h loader/elf.cpp diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index cb2e640e2..87a1c29cc 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -18,11 +18,13 @@ enum class VMAPermission : u8; namespace Core { class System; +class CPUInterruptHandler; /// Generic ARMv8 CPU interface class ARM_Interface : NonCopyable { public: - explicit ARM_Interface(System& system_) : system{system_} {} + explicit ARM_Interface(System& system_, CPUInterruptHandler& interrupt_handler) + : system{system_}, interrupt_handler{interrupt_handler} {} virtual ~ARM_Interface() = default; struct ThreadContext32 { @@ -175,6 +177,7 @@ public: protected: /// System context that this ARM interface is running under. System& system; + CPUInterruptHandler& interrupt_handler; }; } // namespace Core diff --git a/src/core/arm/cpu_interrupt_handler.cpp b/src/core/arm/cpu_interrupt_handler.cpp new file mode 100644 index 000000000..2f1a1a269 --- /dev/null +++ b/src/core/arm/cpu_interrupt_handler.cpp @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/thread.h" +#include "core/arm/cpu_interrupt_handler.h" + +namespace Core { + +CPUInterruptHandler::CPUInterruptHandler() : is_interrupted{} { + interrupt_event = std::make_unique(); +} + +CPUInterruptHandler::~CPUInterruptHandler() = default; + +void CPUInterruptHandler::SetInterrupt(bool is_interrupted_) { + if (is_interrupted_) { + interrupt_event->Set(); + } + this->is_interrupted = is_interrupted_; +} + +void CPUInterruptHandler::AwaitInterrupt() { + interrupt_event->Wait(); +} + +} // namespace Core diff --git a/src/core/arm/cpu_interrupt_handler.h b/src/core/arm/cpu_interrupt_handler.h new file mode 100644 index 000000000..91c31a271 --- /dev/null +++ b/src/core/arm/cpu_interrupt_handler.h @@ -0,0 +1,39 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Common { +class Event; +} + +namespace Core { + +class CPUInterruptHandler { +public: + CPUInterruptHandler(); + ~CPUInterruptHandler(); + + CPUInterruptHandler(const CPUInterruptHandler&) = delete; + CPUInterruptHandler& operator=(const CPUInterruptHandler&) = delete; + + CPUInterruptHandler(CPUInterruptHandler&&) = default; + CPUInterruptHandler& operator=(CPUInterruptHandler&&) = default; + + constexpr bool IsInterrupted() const { + return is_interrupted; + } + + void SetInterrupt(bool is_interrupted); + + void AwaitInterrupt(); + +private: + bool is_interrupted{}; + std::unique_ptr interrupt_event; +}; + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 4c8663d03..0b7aa6a69 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -114,9 +114,9 @@ void ARM_Dynarmic_32::Step() { jit->Step(); } -ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, - std::size_t core_index) - : ARM_Interface{system}, cb(std::make_unique(*this)), +ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterruptHandler& interrupt_handler, + ExclusiveMonitor& exclusive_monitor, std::size_t core_index) + : ARM_Interface{system, interrupt_handler}, cb(std::make_unique(*this)), cp15(std::make_shared(*this)), core_index{core_index}, exclusive_monitor{dynamic_cast(exclusive_monitor)} {} diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index e5b92d7bb..1e7e17e64 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -21,6 +21,7 @@ class Memory; namespace Core { +class CPUInterruptHandler; class DynarmicCallbacks32; class DynarmicCP15; class DynarmicExclusiveMonitor; @@ -28,7 +29,8 @@ class System; class ARM_Dynarmic_32 final : public ARM_Interface { public: - ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); + ARM_Dynarmic_32(System& system, CPUInterruptHandler& interrupt_handler, + ExclusiveMonitor& exclusive_monitor, std::size_t core_index); ~ARM_Dynarmic_32() override; void SetPC(u64 pc) override; diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 5f5e36d94..5e316ffd4 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -9,6 +9,7 @@ #include "common/logging/log.h" #include "common/microprofile.h" #include "common/page_table.h" +#include "core/arm/cpu_interrupt_handler.h" #include "core/arm/dynarmic/arm_dynarmic_64.h" #include "core/core.h" #include "core/core_manager.h" @@ -108,23 +109,16 @@ public: } void AddTicks(u64 ticks) override { - // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a - // rough approximation of the amount of executed ticks in the system, it may be thrown off - // if not all cores are doing a similar amount of work. Instead of doing this, we should - // device a way so that timing is consistent across all cores without increasing the ticks 4 - // times. - u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; - // Always execute at least one tick. - amortized_ticks = std::max(amortized_ticks, 1); - - parent.system.CoreTiming().AddTicks(amortized_ticks); - num_interpreted_instructions = 0; + /// We are using host timing, NOP } u64 GetTicksRemaining() override { - return std::max(parent.system.CoreTiming().GetDowncount(), s64{0}); + if (!parent.interrupt_handler.IsInterrupted()) { + return 1000ULL; + } + return 0ULL; } u64 GetCNTPCT() override { - return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); + return parent.system.CoreTiming().GetClockTicks(); } ARM_Dynarmic_64& parent; @@ -183,10 +177,10 @@ void ARM_Dynarmic_64::Step() { cb->InterpreterFallback(jit->GetPC(), 1); } -ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, - std::size_t core_index) - : ARM_Interface{system}, cb(std::make_unique(*this)), - inner_unicorn{system, ARM_Unicorn::Arch::AArch64}, core_index{core_index}, +ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterruptHandler& interrupt_handler, + ExclusiveMonitor& exclusive_monitor, std::size_t core_index) + : ARM_Interface{system, interrupt_handler}, cb(std::make_unique(*this)), + inner_unicorn{system, interrupt_handler, ARM_Unicorn::Arch::AArch64}, core_index{core_index}, exclusive_monitor{dynamic_cast(exclusive_monitor)} {} ARM_Dynarmic_64::~ARM_Dynarmic_64() = default; diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index 647cecaf0..9e94b58c2 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -22,12 +22,14 @@ class Memory; namespace Core { class DynarmicCallbacks64; +class CPUInterruptHandler; class DynarmicExclusiveMonitor; class System; class ARM_Dynarmic_64 final : public ARM_Interface { public: - ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); + ARM_Dynarmic_64(System& system, CPUInterruptHandler& interrupt_handler, + ExclusiveMonitor& exclusive_monitor, std::size_t core_index); ~ARM_Dynarmic_64() override; void SetPC(u64 pc) override; diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index e40e9626a..0393fe641 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -6,6 +6,7 @@ #include #include "common/assert.h" #include "common/microprofile.h" +#include "core/arm/cpu_interrupt_handler.h" #include "core/arm/unicorn/arm_unicorn.h" #include "core/core.h" #include "core/core_timing.h" @@ -62,7 +63,8 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si return false; } -ARM_Unicorn::ARM_Unicorn(System& system, Arch architecture) : ARM_Interface{system} { +ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture) + : ARM_Interface{system, interrupt_handler} { const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64; CHECKED(uc_open(arch, UC_MODE_ARM, &uc)); @@ -160,8 +162,12 @@ void ARM_Unicorn::Run() { if (GDBStub::IsServerEnabled()) { ExecuteInstructions(std::max(4000000U, 0U)); } else { - ExecuteInstructions( - std::max(std::size_t(system.CoreTiming().GetDowncount()), std::size_t{0})); + while (true) { + if (interrupt_handler.IsInterrupted()) { + return; + } + ExecuteInstructions(10); + } } } @@ -183,8 +189,6 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC, page_buffer.data())); CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions)); CHECKED(uc_mem_unmap(uc, map_addr, page_buffer.size())); - - system.CoreTiming().AddTicks(num_instructions); if (GDBStub::IsServerEnabled()) { if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) { uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 725c65085..0a4c087cd 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -11,6 +11,7 @@ namespace Core { +class CPUInterruptHandler; class System; class ARM_Unicorn final : public ARM_Interface { @@ -20,7 +21,7 @@ public: AArch64, // 64-bit ARM }; - explicit ARM_Unicorn(System& system, Arch architecture); + explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture); ~ARM_Unicorn() override; void SetPC(u64 pc) override; diff --git a/src/core/core.cpp b/src/core/core.cpp index f9f8a3000..e8936b09d 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -11,7 +11,6 @@ #include "common/string_util.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" -#include "core/core_manager.h" #include "core/core_timing.h" #include "core/cpu_manager.h" #include "core/device_memory.h" @@ -117,23 +116,30 @@ struct System::Impl { : kernel{system}, fs_controller{system}, memory{system}, cpu_manager{system}, reporter{system}, applet_manager{system} {} - CoreManager& CurrentCoreManager() { - return cpu_manager.GetCurrentCoreManager(); - } - Kernel::PhysicalCore& CurrentPhysicalCore() { - const auto index = cpu_manager.GetActiveCoreIndex(); - return kernel.PhysicalCore(index); + return kernel.CurrentPhysicalCore(); } Kernel::PhysicalCore& GetPhysicalCore(std::size_t index) { return kernel.PhysicalCore(index); } - ResultStatus RunLoop(bool tight_loop) { + ResultStatus Run() { status = ResultStatus::Success; - cpu_manager.RunLoop(tight_loop); + kernel.Suspend(false); + core_timing.SyncPause(false); + cpu_manager.Pause(false); + + return status; + } + + ResultStatus Pause() { + status = ResultStatus::Success; + + kernel.Suspend(true); + core_timing.SyncPause(true); + cpu_manager.Pause(true); return status; } @@ -143,7 +149,7 @@ struct System::Impl { device_memory = std::make_unique(system); - core_timing.Initialize(); + core_timing.Initialize([&system]() { system.RegisterHostThread(); }); kernel.Initialize(); cpu_manager.Initialize(); @@ -387,20 +393,24 @@ struct System::Impl { System::System() : impl{std::make_unique(*this)} {} System::~System() = default; -CoreManager& System::CurrentCoreManager() { - return impl->CurrentCoreManager(); +CpuManager& System::GetCpuManager() { + return impl->cpu_manager; +} + +const CpuManager& System::GetCpuManager() const { + return impl->cpu_manager; } -const CoreManager& System::CurrentCoreManager() const { - return impl->CurrentCoreManager(); +System::ResultStatus System::Run() { + return impl->Run(); } -System::ResultStatus System::RunLoop(bool tight_loop) { - return impl->RunLoop(tight_loop); +System::ResultStatus System::Pause() { + return impl->Pause(); } System::ResultStatus System::SingleStep() { - return RunLoop(false); + return ResultStatus::Success; } void System::InvalidateCpuInstructionCaches() { @@ -444,7 +454,9 @@ const ARM_Interface& System::CurrentArmInterface() const { } std::size_t System::CurrentCoreIndex() const { - return impl->cpu_manager.GetActiveCoreIndex(); + std::size_t core = impl->kernel.GetCurrentHostThreadID(); + ASSERT(core < Core::Hardware::NUM_CPU_CORES); + return core; } Kernel::Scheduler& System::CurrentScheduler() { @@ -497,15 +509,6 @@ const ARM_Interface& System::ArmInterface(std::size_t core_index) const { return impl->GetPhysicalCore(core_index).ArmInterface(); } -CoreManager& System::GetCoreManager(std::size_t core_index) { - return impl->cpu_manager.GetCoreManager(core_index); -} - -const CoreManager& System::GetCoreManager(std::size_t core_index) const { - ASSERT(core_index < NUM_CPU_CORES); - return impl->cpu_manager.GetCoreManager(core_index); -} - ExclusiveMonitor& System::Monitor() { return impl->kernel.GetExclusiveMonitor(); } diff --git a/src/core/core.h b/src/core/core.h index acc53d6a1..7f170fc54 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -90,7 +90,7 @@ class InterruptManager; namespace Core { class ARM_Interface; -class CoreManager; +class CpuManager; class DeviceMemory; class ExclusiveMonitor; class FrameLimiter; @@ -136,16 +136,18 @@ public: }; /** - * Run the core CPU loop - * This function runs the core for the specified number of CPU instructions before trying to - * update hardware. This is much faster than SingleStep (and should be equivalent), as the CPU - * is not required to do a full dispatch with each instruction. NOTE: the number of instructions - * requested is not guaranteed to run, as this will be interrupted preemptively if a hardware - * update is requested (e.g. on a thread switch). - * @param tight_loop If false, the CPU single-steps. - * @return Result status, indicating whether or not the operation succeeded. + * Run the OS and Application + * This function will start emulation and run the competent devices + */ + ResultStatus Run(); + + /** + * Pause the OS and Application + * This function will pause emulation and stop the competent devices */ - ResultStatus RunLoop(bool tight_loop = true); + ResultStatus Pause(); + + /** * Step the CPU one instruction @@ -215,11 +217,9 @@ public: /// Gets a const reference to an ARM interface from the CPU core with the specified index const ARM_Interface& ArmInterface(std::size_t core_index) const; - /// Gets a CPU interface to the CPU core with the specified index - CoreManager& GetCoreManager(std::size_t core_index); + CpuManager& GetCpuManager(); - /// Gets a CPU interface to the CPU core with the specified index - const CoreManager& GetCoreManager(std::size_t core_index) const; + const CpuManager& GetCpuManager() const; /// Gets a reference to the exclusive monitor ExclusiveMonitor& Monitor(); @@ -373,12 +373,6 @@ public: private: System(); - /// Returns the currently running CPU core - CoreManager& CurrentCoreManager(); - - /// Returns the currently running CPU core - const CoreManager& CurrentCoreManager() const; - /** * Initialize the emulated system. * @param emu_window Reference to the host-system window used for video output and keyboard diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp index b6b797c80..45f0bb547 100644 --- a/src/core/core_manager.cpp +++ b/src/core/core_manager.cpp @@ -34,7 +34,6 @@ void CoreManager::RunLoop(bool tight_loop) { // instead advance to the next event and try to yield to the next thread if (Kernel::GetCurrentThread() == nullptr) { LOG_TRACE(Core, "Core-{} idling", core_index); - core_timing.Idle(); } else { if (tight_loop) { physical_core.Run(); @@ -42,7 +41,6 @@ void CoreManager::RunLoop(bool tight_loop) { physical_core.Step(); } } - core_timing.Advance(); Reschedule(); } @@ -59,7 +57,7 @@ void CoreManager::Reschedule() { // Lock the global kernel mutex when we manipulate the HLE state std::lock_guard lock(HLE::g_hle_lock); - global_scheduler.SelectThread(core_index); + // global_scheduler.SelectThread(core_index); physical_core.Scheduler().TryDoContextSwitch(); } diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 46d4178c4..a3ce69790 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -1,5 +1,5 @@ -// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project -// Licensed under GPLv2+ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include "core/core_timing.h" @@ -10,20 +10,16 @@ #include #include "common/assert.h" -#include "common/thread.h" #include "core/core_timing_util.h" -#include "core/hardware_properties.h" namespace Core::Timing { -constexpr int MAX_SLICE_LENGTH = 10000; - std::shared_ptr CreateEvent(std::string name, TimedCallback&& callback) { return std::make_shared(std::move(callback), std::move(name)); } struct CoreTiming::Event { - s64 time; + u64 time; u64 fifo_order; u64 userdata; std::weak_ptr type; @@ -39,51 +35,74 @@ struct CoreTiming::Event { } }; -CoreTiming::CoreTiming() = default; -CoreTiming::~CoreTiming() = default; +CoreTiming::CoreTiming() { + clock = + Common::CreateBestMatchingClock(Core::Hardware::BASE_CLOCK_RATE, Core::Hardware::CNTFREQ); +} -void CoreTiming::Initialize() { - downcounts.fill(MAX_SLICE_LENGTH); - time_slice.fill(MAX_SLICE_LENGTH); - slice_length = MAX_SLICE_LENGTH; - global_timer = 0; - idled_cycles = 0; - current_context = 0; +CoreTiming::~CoreTiming() = default; - // The time between CoreTiming being initialized and the first call to Advance() is considered - // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before - // executing the first cycle of each slice to prepare the slice length and downcount for - // that slice. - is_global_timer_sane = true; +void CoreTiming::ThreadEntry(CoreTiming& instance) { + std::string name = "yuzu:HostTiming"; + Common::SetCurrentThreadName(name.c_str()); + instance.on_thread_init(); + instance.ThreadLoop(); +} +void CoreTiming::Initialize(std::function&& on_thread_init_) { + on_thread_init = std::move(on_thread_init_); event_fifo_id = 0; - const auto empty_timed_callback = [](u64, s64) {}; ev_lost = CreateEvent("_lost_event", empty_timed_callback); + timer_thread = std::make_unique(ThreadEntry, std::ref(*this)); } void CoreTiming::Shutdown() { + paused = true; + shutting_down = true; + event.Set(); + timer_thread->join(); ClearPendingEvents(); + timer_thread.reset(); + has_started = false; } -void CoreTiming::ScheduleEvent(s64 cycles_into_future, const std::shared_ptr& event_type, - u64 userdata) { - std::lock_guard guard{inner_mutex}; - const s64 timeout = GetTicks() + cycles_into_future; +void CoreTiming::Pause(bool is_paused) { + paused = is_paused; +} - // If this event needs to be scheduled before the next advance(), force one early - if (!is_global_timer_sane) { - ForceExceptionCheck(cycles_into_future); +void CoreTiming::SyncPause(bool is_paused) { + if (is_paused == paused && paused_set == paused) { + return; } + Pause(is_paused); + event.Set(); + while (paused_set != is_paused) + ; +} + +bool CoreTiming::IsRunning() const { + return !paused_set; +} + +bool CoreTiming::HasPendingEvents() const { + return !(wait_set && event_queue.empty()); +} + +void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr& event_type, + u64 userdata) { + basic_lock.lock(); + const u64 timeout = static_cast(GetGlobalTimeNs().count() + ns_into_future); event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); + basic_lock.unlock(); + event.Set(); } void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, u64 userdata) { - std::lock_guard guard{inner_mutex}; - + basic_lock.lock(); const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type.lock().get() == event_type.get() && e.userdata == userdata; }); @@ -93,23 +112,23 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, u event_queue.erase(itr, event_queue.end()); std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); } + basic_lock.unlock(); } -u64 CoreTiming::GetTicks() const { - u64 ticks = static_cast(global_timer); - if (!is_global_timer_sane) { - ticks += accumulated_ticks; - } - return ticks; +void CoreTiming::AddTicks(std::size_t core_index, u64 ticks) { + ticks_count[core_index] += ticks; +} + +void CoreTiming::ResetTicks(std::size_t core_index) { + ticks_count[core_index] = 0; } -u64 CoreTiming::GetIdleTicks() const { - return static_cast(idled_cycles); +u64 CoreTiming::GetCPUTicks() const { + return clock->GetCPUCycles(); } -void CoreTiming::AddTicks(u64 ticks) { - accumulated_ticks += ticks; - downcounts[current_context] -= static_cast(ticks); +u64 CoreTiming::GetClockTicks() const { + return clock->GetClockCycles(); } void CoreTiming::ClearPendingEvents() { @@ -117,7 +136,7 @@ void CoreTiming::ClearPendingEvents() { } void CoreTiming::RemoveEvent(const std::shared_ptr& event_type) { - std::lock_guard guard{inner_mutex}; + basic_lock.lock(); const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type.lock().get() == event_type.get(); @@ -128,99 +147,64 @@ void CoreTiming::RemoveEvent(const std::shared_ptr& event_type) { event_queue.erase(itr, event_queue.end()); std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); } + basic_lock.unlock(); } -void CoreTiming::ForceExceptionCheck(s64 cycles) { - cycles = std::max(0, cycles); - if (downcounts[current_context] <= cycles) { - return; - } - - // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int - // here. Account for cycles already executed by adjusting the g.slice_length - downcounts[current_context] = static_cast(cycles); -} - -std::optional CoreTiming::NextAvailableCore(const s64 needed_ticks) const { - const u64 original_context = current_context; - u64 next_context = (original_context + 1) % num_cpu_cores; - while (next_context != original_context) { - if (time_slice[next_context] >= needed_ticks) { - return {next_context}; - } else if (time_slice[next_context] >= 0) { - return std::nullopt; - } - next_context = (next_context + 1) % num_cpu_cores; - } - return std::nullopt; -} - -void CoreTiming::Advance() { - std::unique_lock guard(inner_mutex); - - const u64 cycles_executed = accumulated_ticks; - time_slice[current_context] = std::max(0, time_slice[current_context] - accumulated_ticks); - global_timer += cycles_executed; - - is_global_timer_sane = true; +std::optional CoreTiming::Advance() { + advance_lock.lock(); + basic_lock.lock(); + global_timer = GetGlobalTimeNs().count(); while (!event_queue.empty() && event_queue.front().time <= global_timer) { Event evt = std::move(event_queue.front()); std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); event_queue.pop_back(); - inner_mutex.unlock(); + basic_lock.unlock(); if (auto event_type{evt.type.lock()}) { event_type->callback(evt.userdata, global_timer - evt.time); } - inner_mutex.lock(); + basic_lock.lock(); } - is_global_timer_sane = false; - - // Still events left (scheduled in the future) if (!event_queue.empty()) { - const s64 needed_ticks = - std::min(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); - const auto next_core = NextAvailableCore(needed_ticks); - if (next_core) { - downcounts[*next_core] = needed_ticks; - } + const u64 next_time = event_queue.front().time - global_timer; + basic_lock.unlock(); + advance_lock.unlock(); + return next_time; + } else { + basic_lock.unlock(); + advance_lock.unlock(); + return std::nullopt; } - - accumulated_ticks = 0; - - downcounts[current_context] = time_slice[current_context]; } -void CoreTiming::ResetRun() { - downcounts.fill(MAX_SLICE_LENGTH); - time_slice.fill(MAX_SLICE_LENGTH); - current_context = 0; - // Still events left (scheduled in the future) - if (!event_queue.empty()) { - const s64 needed_ticks = - std::min(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); - downcounts[current_context] = needed_ticks; +void CoreTiming::ThreadLoop() { + has_started = true; + while (!shutting_down) { + while (!paused) { + paused_set = false; + const auto next_time = Advance(); + if (next_time) { + std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time); + event.WaitFor(next_time_ns); + } else { + wait_set = true; + event.Wait(); + } + wait_set = false; + } + paused_set = true; } - - is_global_timer_sane = false; - accumulated_ticks = 0; } -void CoreTiming::Idle() { - accumulated_ticks += downcounts[current_context]; - idled_cycles += downcounts[current_context]; - downcounts[current_context] = 0; +std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { + return clock->GetTimeNS(); } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { - return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE}; -} - -s64 CoreTiming::GetDowncount() const { - return downcounts[current_context]; + return clock->GetTimeUS(); } } // namespace Core::Timing diff --git a/src/core/core_timing.h b/src/core/core_timing.h index d50f4eb8a..707c8ef0c 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -1,19 +1,25 @@ -// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project -// Licensed under GPLv2+ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once +#include #include #include #include #include #include #include +#include #include #include "common/common_types.h" +#include "common/spin_lock.h" +#include "common/thread.h" #include "common/threadsafe_queue.h" +#include "common/wall_clock.h" +#include "core/hardware_properties.h" namespace Core::Timing { @@ -56,58 +62,55 @@ public: /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is /// required to end slice - 1 and start slice 0 before the first cycle of code is executed. - void Initialize(); + void Initialize(std::function&& on_thread_init_); /// Tears down all timing related functionality. void Shutdown(); - /// After the first Advance, the slice lengths and the downcount will be reduced whenever an - /// event is scheduled earlier than the current values. - /// - /// Scheduling from a callback will not update the downcount until the Advance() completes. - void ScheduleEvent(s64 cycles_into_future, const std::shared_ptr& event_type, - u64 userdata = 0); + /// Pauses/Unpauses the execution of the timer thread. + void Pause(bool is_paused); - void UnscheduleEvent(const std::shared_ptr& event_type, u64 userdata); + /// Pauses/Unpauses the execution of the timer thread and waits until paused. + void SyncPause(bool is_paused); - /// We only permit one event of each type in the queue at a time. - void RemoveEvent(const std::shared_ptr& event_type); + /// Checks if core timing is running. + bool IsRunning() const; - void ForceExceptionCheck(s64 cycles); + /// Checks if the timer thread has started. + bool HasStarted() const { + return has_started; + } - /// This should only be called from the emu thread, if you are calling it any other thread, - /// you are doing something evil - u64 GetTicks() const; + /// Checks if there are any pending time events. + bool HasPendingEvents() const; - u64 GetIdleTicks() const; + /// Schedules an event in core timing + void ScheduleEvent(s64 ns_into_future, const std::shared_ptr& event_type, + u64 userdata = 0); - void AddTicks(u64 ticks); + void UnscheduleEvent(const std::shared_ptr& event_type, u64 userdata); - /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends - /// the previous timing slice and begins the next one, you must Advance from the previous - /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an - /// Advance() is required to initialize the slice length before the first cycle of emulated - /// instructions is executed. - void Advance(); + /// We only permit one event of each type in the queue at a time. + void RemoveEvent(const std::shared_ptr& event_type); - /// Pretend that the main CPU has executed enough cycles to reach the next event. - void Idle(); + void AddTicks(std::size_t core_index, u64 ticks); - std::chrono::microseconds GetGlobalTimeUs() const; + void ResetTicks(std::size_t core_index); - void ResetRun(); + /// Returns current time in emulated CPU cycles + u64 GetCPUTicks() const; - s64 GetDowncount() const; + /// Returns current time in emulated in Clock cycles + u64 GetClockTicks() const; - void SwitchContext(u64 new_context) { - current_context = new_context; - } + /// Returns current time in microseconds. + std::chrono::microseconds GetGlobalTimeUs() const; - bool CanCurrentContextRun() const { - return time_slice[current_context] > 0; - } + /// Returns current time in nanoseconds. + std::chrono::nanoseconds GetGlobalTimeNs() const; - std::optional NextAvailableCore(const s64 needed_ticks) const; + /// Checks for events manually and returns time in nanoseconds for next event, threadsafe. + std::optional Advance(); private: struct Event; @@ -115,21 +118,14 @@ private: /// Clear all pending events. This should ONLY be done on exit. void ClearPendingEvents(); - static constexpr u64 num_cpu_cores = 4; + static void ThreadEntry(CoreTiming& instance); + void ThreadLoop(); + + std::unique_ptr clock; - s64 global_timer = 0; - s64 idled_cycles = 0; - s64 slice_length = 0; - u64 accumulated_ticks = 0; - std::array downcounts{}; - // Slice of time assigned to each core per run. - std::array time_slice{}; - u64 current_context = 0; + u64 global_timer = 0; - // Are we in a function that has been called from Advance() - // If events are scheduled from a function that gets called from Advance(), - // don't change slice_length and downcount. - bool is_global_timer_sane = false; + std::chrono::nanoseconds start_point; // The queue is a min-heap using std::make_heap/push_heap/pop_heap. // We don't use std::priority_queue because we need to be able to serialize, unserialize and @@ -139,8 +135,18 @@ private: u64 event_fifo_id = 0; std::shared_ptr ev_lost; - - std::mutex inner_mutex; + Common::Event event{}; + Common::SpinLock basic_lock{}; + Common::SpinLock advance_lock{}; + std::unique_ptr timer_thread; + std::atomic paused{}; + std::atomic paused_set{}; + std::atomic wait_set{}; + std::atomic shutting_down{}; + std::atomic has_started{}; + std::function on_thread_init{}; + + std::array, Core::Hardware::NUM_CPU_CORES> ticks_count{}; }; /// Creates a core timing event with the given name and callback. diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 70ddbdcca..494850992 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -2,80 +2,192 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/fiber.h" +#include "common/thread.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" -#include "core/core_manager.h" #include "core/core_timing.h" #include "core/cpu_manager.h" #include "core/gdbstub/gdbstub.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" +#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/thread.h" namespace Core { CpuManager::CpuManager(System& system) : system{system} {} CpuManager::~CpuManager() = default; +void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) { + cpu_manager.RunThread(core); +} + void CpuManager::Initialize() { - for (std::size_t index = 0; index < core_managers.size(); ++index) { - core_managers[index] = std::make_unique(system, index); + running_mode = true; + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + core_data[core].host_thread = + std::make_unique(ThreadStart, std::ref(*this), core); } } void CpuManager::Shutdown() { - for (auto& cpu_core : core_managers) { - cpu_core.reset(); + running_mode = false; + Pause(false); + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + core_data[core].host_thread->join(); } } -CoreManager& CpuManager::GetCoreManager(std::size_t index) { - return *core_managers.at(index); +void CpuManager::GuestThreadFunction(void* cpu_manager_) { + CpuManager* cpu_manager = static_cast(cpu_manager_); + cpu_manager->RunGuestThread(); } -const CoreManager& CpuManager::GetCoreManager(std::size_t index) const { - return *core_managers.at(index); +void CpuManager::IdleThreadFunction(void* cpu_manager_) { + CpuManager* cpu_manager = static_cast(cpu_manager_); + cpu_manager->RunIdleThread(); } -CoreManager& CpuManager::GetCurrentCoreManager() { - // Otherwise, use single-threaded mode active_core variable - return *core_managers[active_core]; +void CpuManager::SuspendThreadFunction(void* cpu_manager_) { + CpuManager* cpu_manager = static_cast(cpu_manager_); + cpu_manager->RunSuspendThread(); } -const CoreManager& CpuManager::GetCurrentCoreManager() const { - // Otherwise, use single-threaded mode active_core variable - return *core_managers[active_core]; +std::function CpuManager::GetGuestThreadStartFunc() { + return std::function(GuestThreadFunction); } -void CpuManager::RunLoop(bool tight_loop) { - if (GDBStub::IsServerEnabled()) { - GDBStub::HandlePacket(); - - // If the loop is halted and we want to step, use a tiny (1) number of instructions to - // execute. Otherwise, get out of the loop function. - if (GDBStub::GetCpuHaltFlag()) { - if (GDBStub::GetCpuStepFlag()) { - tight_loop = false; - } else { - return; - } - } +std::function CpuManager::GetIdleThreadStartFunc() { + return std::function(IdleThreadFunction); +} + +std::function CpuManager::GetSuspendThreadStartFunc() { + return std::function(SuspendThreadFunction); +} + +void* CpuManager::GetStartFuncParamater() { + return static_cast(this); +} + +void CpuManager::RunGuestThread() { + auto& kernel = system.Kernel(); + { + auto& sched = kernel.CurrentScheduler(); + sched.OnThreadStart(); + } + while (true) { + auto& physical_core = kernel.CurrentPhysicalCore(); + LOG_CRITICAL(Core_ARM, "Running Guest Thread"); + physical_core.Idle(); + LOG_CRITICAL(Core_ARM, "Leaving Guest Thread"); + // physical_core.Run(); + auto& scheduler = physical_core.Scheduler(); + scheduler.TryDoContextSwitch(); } +} - auto& core_timing = system.CoreTiming(); - core_timing.ResetRun(); - bool keep_running{}; - do { - keep_running = false; - for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { - core_timing.SwitchContext(active_core); - if (core_timing.CanCurrentContextRun()) { - core_managers[active_core]->RunLoop(tight_loop); +void CpuManager::RunIdleThread() { + auto& kernel = system.Kernel(); + while (true) { + auto& physical_core = kernel.CurrentPhysicalCore(); + LOG_CRITICAL(Core_ARM, "Running Idle Thread"); + physical_core.Idle(); + auto& scheduler = physical_core.Scheduler(); + scheduler.TryDoContextSwitch(); + } +} + +void CpuManager::RunSuspendThread() { + LOG_CRITICAL(Core_ARM, "Suspending Thread Entered"); + auto& kernel = system.Kernel(); + { + auto& sched = kernel.CurrentScheduler(); + sched.OnThreadStart(); + } + while (true) { + auto core = kernel.GetCurrentHostThreadID(); + auto& scheduler = kernel.CurrentScheduler(); + Kernel::Thread* current_thread = scheduler.GetCurrentThread(); + LOG_CRITICAL(Core_ARM, "Suspending Core {}", core); + Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[core].host_context); + LOG_CRITICAL(Core_ARM, "Unsuspending Core {}", core); + ASSERT(scheduler.ContextSwitchPending()); + ASSERT(core == kernel.GetCurrentHostThreadID()); + scheduler.TryDoContextSwitch(); + } +} + +void CpuManager::Pause(bool paused) { + if (!paused) { + bool all_not_barrier = false; + while (!all_not_barrier) { + all_not_barrier = true; + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + all_not_barrier &= + !core_data[core].is_running.load() && core_data[core].initialized.load(); } - keep_running |= core_timing.CanCurrentContextRun(); } - } while (keep_running); + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + core_data[core].enter_barrier->Set(); + } + if (paused_state.load()) { + bool all_barrier = false; + while (!all_barrier) { + all_barrier = true; + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + all_barrier &= + core_data[core].is_paused.load() && core_data[core].initialized.load(); + } + } + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + core_data[core].exit_barrier->Set(); + } + } + } else { + /// Wait until all cores are paused. + bool all_barrier = false; + while (!all_barrier) { + all_barrier = true; + for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + all_barrier &= + core_data[core].is_paused.load() && core_data[core].initialized.load(); + } + } + /// Don't release the barrier + } + paused_state = paused; +} - if (GDBStub::IsServerEnabled()) { - GDBStub::SetCpuStepFlag(false); +void CpuManager::RunThread(std::size_t core) { + /// Initialization + system.RegisterCoreThread(core); + std::string name = "yuzu:CoreHostThread_" + std::to_string(core); + Common::SetCurrentThreadName(name.c_str()); + auto& data = core_data[core]; + data.enter_barrier = std::make_unique(); + data.exit_barrier = std::make_unique(); + data.host_context = Common::Fiber::ThreadToFiber(); + data.is_running = false; + data.initialized = true; + /// Running + while (running_mode) { + data.is_running = false; + data.enter_barrier->Wait(); + auto& scheduler = system.Kernel().CurrentScheduler(); + Kernel::Thread* current_thread = scheduler.GetCurrentThread(); + data.is_running = true; + Common::Fiber::YieldTo(data.host_context, current_thread->GetHostContext()); + data.is_running = false; + data.is_paused = true; + data.exit_barrier->Wait(); + data.is_paused = false; } + /// Time to cleanup + data.host_context->Exit(); + data.enter_barrier.reset(); + data.exit_barrier.reset(); + data.initialized = false; } } // namespace Core diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h index 97554d1bb..8103ae857 100644 --- a/src/core/cpu_manager.h +++ b/src/core/cpu_manager.h @@ -5,12 +5,18 @@ #pragma once #include +#include #include +#include #include "core/hardware_properties.h" +namespace Common { +class Event; +class Fiber; +} // namespace Common + namespace Core { -class CoreManager; class System; class CpuManager { @@ -27,21 +33,40 @@ public: void Initialize(); void Shutdown(); - CoreManager& GetCoreManager(std::size_t index); - const CoreManager& GetCoreManager(std::size_t index) const; + void Pause(bool paused); + + std::function GetGuestThreadStartFunc(); + std::function GetIdleThreadStartFunc(); + std::function GetSuspendThreadStartFunc(); + void* GetStartFuncParamater(); - CoreManager& GetCurrentCoreManager(); - const CoreManager& GetCurrentCoreManager() const; +private: + static void GuestThreadFunction(void* cpu_manager); + static void IdleThreadFunction(void* cpu_manager); + static void SuspendThreadFunction(void* cpu_manager); - std::size_t GetActiveCoreIndex() const { - return active_core; - } + void RunGuestThread(); + void RunIdleThread(); + void RunSuspendThread(); - void RunLoop(bool tight_loop); + static void ThreadStart(CpuManager& cpu_manager, std::size_t core); -private: - std::array, Hardware::NUM_CPU_CORES> core_managers; - std::size_t active_core{}; ///< Active core, only used in single thread mode + void RunThread(std::size_t core); + + struct CoreData { + std::shared_ptr host_context; + std::unique_ptr enter_barrier; + std::unique_ptr exit_barrier; + std::atomic is_running; + std::atomic is_paused; + std::atomic initialized; + std::unique_ptr host_thread; + }; + + std::atomic running_mode{}; + std::atomic paused_state{}; + + std::array core_data{}; System& system; }; diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 7655382fa..ba051a7d8 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -13,11 +13,13 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "common/thread.h" #include "core/arm/arm_interface.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/cpu_manager.h" #include "core/device_memory.h" #include "core/hardware_properties.h" #include "core/hle/kernel/client_port.h" @@ -117,7 +119,9 @@ struct KernelCore::Impl { InitializeSystemResourceLimit(kernel); InitializeMemoryLayout(); InitializeThreads(); - InitializePreemption(); + InitializePreemption(kernel); + InitializeSchedulers(); + InitializeSuspendThreads(); } void Shutdown() { @@ -155,6 +159,12 @@ struct KernelCore::Impl { } } + void InitializeSchedulers() { + for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { + cores[i].Scheduler().Initialize(); + } + } + // Creates the default system resource limit void InitializeSystemResourceLimit(KernelCore& kernel) { system_resource_limit = ResourceLimit::Create(kernel); @@ -178,10 +188,13 @@ struct KernelCore::Impl { Core::Timing::CreateEvent("ThreadWakeupCallback", ThreadWakeupCallback); } - void InitializePreemption() { - preemption_event = - Core::Timing::CreateEvent("PreemptionCallback", [this](u64 userdata, s64 cycles_late) { - global_scheduler.PreemptThreads(); + void InitializePreemption(KernelCore& kernel) { + preemption_event = Core::Timing::CreateEvent( + "PreemptionCallback", [this, &kernel](u64 userdata, s64 cycles_late) { + { + SchedulerLock lock(kernel); + global_scheduler.PreemptThreads(); + } s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10)); system.CoreTiming().ScheduleEvent(time_interval, preemption_event); }); @@ -190,6 +203,20 @@ struct KernelCore::Impl { system.CoreTiming().ScheduleEvent(time_interval, preemption_event); } + void InitializeSuspendThreads() { + for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { + std::string name = "Suspend Thread Id:" + std::to_string(i); + std::function init_func = + system.GetCpuManager().GetSuspendThreadStartFunc(); + void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater(); + ThreadType type = + static_cast(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_SUSPEND); + auto thread_res = Thread::Create(system, type, name, 0, 0, 0, static_cast(i), 0, + nullptr, std::move(init_func), init_func_parameter); + suspend_threads[i] = std::move(thread_res).Unwrap(); + } + } + void MakeCurrentProcess(Process* process) { current_process = process; @@ -201,7 +228,10 @@ struct KernelCore::Impl { core.SetIs64Bit(process->Is64BitProcess()); } - system.Memory().SetCurrentPageTable(*process); + u32 core_id = GetCurrentHostThreadID(); + if (core_id < Core::Hardware::NUM_CPU_CORES) { + system.Memory().SetCurrentPageTable(*process, core_id); + } } void RegisterCoreThread(std::size_t core_id) { @@ -219,7 +249,9 @@ struct KernelCore::Impl { std::unique_lock lock{register_thread_mutex}; const std::thread::id this_id = std::this_thread::get_id(); const auto it = host_thread_ids.find(this_id); - ASSERT(it == host_thread_ids.end()); + if (it != host_thread_ids.end()) { + return; + } host_thread_ids[this_id] = registered_thread_ids++; } @@ -343,6 +375,8 @@ struct KernelCore::Impl { std::shared_ptr irs_shared_mem; std::shared_ptr time_shared_mem; + std::array, Core::Hardware::NUM_CPU_CORES> suspend_threads{}; + // System context Core::System& system; }; @@ -412,6 +446,26 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const { return impl->cores[id]; } +Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() { + u32 core_id = impl->GetCurrentHostThreadID(); + ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); + return impl->cores[core_id]; +} + +const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const { + u32 core_id = impl->GetCurrentHostThreadID(); + ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); + return impl->cores[core_id]; +} + +Kernel::Scheduler& KernelCore::CurrentScheduler() { + return CurrentPhysicalCore().Scheduler(); +} + +const Kernel::Scheduler& KernelCore::CurrentScheduler() const { + return CurrentPhysicalCore().Scheduler(); +} + Kernel::Synchronization& KernelCore::Synchronization() { return impl->synchronization; } @@ -557,4 +611,20 @@ const Kernel::SharedMemory& KernelCore::GetTimeSharedMem() const { return *impl->time_shared_mem; } +void KernelCore::Suspend(bool in_suspention) { + const bool should_suspend = exception_exited || in_suspention; + { + SchedulerLock lock(*this); + ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep; + for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { + impl->suspend_threads[i]->SetStatus(status); + } + } +} + +void KernelCore::ExceptionalExit() { + exception_exited = true; + Suspend(true); +} + } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 83de1f542..5d32a8329 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -110,6 +110,18 @@ public: /// Gets the an instance of the respective physical CPU core. const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; + /// Gets the sole instance of the Scheduler at the current running core. + Kernel::Scheduler& CurrentScheduler(); + + /// Gets the sole instance of the Scheduler at the current running core. + const Kernel::Scheduler& CurrentScheduler() const; + + /// Gets the an instance of the current physical CPU core. + Kernel::PhysicalCore& CurrentPhysicalCore(); + + /// Gets the an instance of the current physical CPU core. + const Kernel::PhysicalCore& CurrentPhysicalCore() const; + /// Gets the an instance of the Synchronization Interface. Kernel::Synchronization& Synchronization(); @@ -191,6 +203,12 @@ public: /// Gets the shared memory object for Time services. const Kernel::SharedMemory& GetTimeSharedMem() const; + /// Suspend/unsuspend the OS. + void Suspend(bool in_suspention); + + /// Exceptional exit the OS. + void ExceptionalExit(); + private: friend class Object; friend class Process; @@ -219,6 +237,7 @@ private: struct Impl; std::unique_ptr impl; + bool exception_exited{}; }; } // namespace Kernel diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index a15011076..69202540b 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -2,12 +2,15 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/assert.h" #include "common/logging/log.h" +#include "common/spin_lock.h" #include "core/arm/arm_interface.h" #ifdef ARCHITECTURE_x86_64 #include "core/arm/dynarmic/arm_dynarmic_32.h" #include "core/arm/dynarmic/arm_dynarmic_64.h" #endif +#include "core/arm/cpu_interrupt_handler.h" #include "core/arm/exclusive_monitor.h" #include "core/arm/unicorn/arm_unicorn.h" #include "core/core.h" @@ -19,21 +22,23 @@ namespace Kernel { PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor) - : core_index{id} { + : interrupt_handler{}, core_index{id} { #ifdef ARCHITECTURE_x86_64 - arm_interface_32 = - std::make_unique(system, exclusive_monitor, core_index); - arm_interface_64 = - std::make_unique(system, exclusive_monitor, core_index); - + arm_interface_32 = std::make_unique(system, interrupt_handler, + exclusive_monitor, core_index); + arm_interface_64 = std::make_unique(system, interrupt_handler, + exclusive_monitor, core_index); #else using Core::ARM_Unicorn; - arm_interface_32 = std::make_unique(system, ARM_Unicorn::Arch::AArch32); - arm_interface_64 = std::make_unique(system, ARM_Unicorn::Arch::AArch64); + arm_interface_32 = + std::make_unique(system, interrupt_handler, ARM_Unicorn::Arch::AArch32); + arm_interface_64 = + std::make_unique(system, interrupt_handler, ARM_Unicorn::Arch::AArch64); LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); #endif scheduler = std::make_unique(system, core_index); + guard = std::make_unique(); } PhysicalCore::~PhysicalCore() = default; @@ -47,6 +52,10 @@ void PhysicalCore::Step() { arm_interface->Step(); } +void PhysicalCore::Idle() { + interrupt_handler.AwaitInterrupt(); +} + void PhysicalCore::Stop() { arm_interface->PrepareReschedule(); } @@ -63,4 +72,16 @@ void PhysicalCore::SetIs64Bit(bool is_64_bit) { } } +void PhysicalCore::Interrupt() { + guard->lock(); + interrupt_handler.SetInterrupt(true); + guard->unlock(); +} + +void PhysicalCore::ClearInterrupt() { + guard->lock(); + interrupt_handler.SetInterrupt(false); + guard->unlock(); +} + } // namespace Kernel diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h index 3269166be..c3da30b72 100644 --- a/src/core/hle/kernel/physical_core.h +++ b/src/core/hle/kernel/physical_core.h @@ -7,6 +7,12 @@ #include #include +#include "core/arm/cpu_interrupt_handler.h" + +namespace Common { + class SpinLock; +} + namespace Kernel { class Scheduler; } // namespace Kernel @@ -32,11 +38,24 @@ public: /// Execute current jit state void Run(); + /// Set this core in IdleState. + void Idle(); /// Execute a single instruction in current jit. void Step(); /// Stop JIT execution/exit void Stop(); + /// Interrupt this physical core. + void Interrupt(); + + /// Clear this core's interrupt + void ClearInterrupt(); + + /// Check if this core is interrupted + bool IsInterrupted() const { + return interrupt_handler.IsInterrupted(); + } + // Shutdown this physical core. void Shutdown(); @@ -71,11 +90,13 @@ public: void SetIs64Bit(bool is_64_bit); private: + Core::CPUInterruptHandler interrupt_handler; std::size_t core_index; std::unique_ptr arm_interface_32; std::unique_ptr arm_interface_64; std::unique_ptr scheduler; Core::ARM_Interface* arm_interface{}; + std::unique_ptr guard; }; } // namespace Kernel diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index c4c5199b1..7e26a54f4 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -30,14 +30,15 @@ namespace { /** * Sets up the primary application thread * + * @param system The system instance to create the main thread under. * @param owner_process The parent process for the main thread - * @param kernel The kernel instance to create the main thread under. * @param priority The priority to give the main thread */ -void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, VAddr stack_top) { +void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) { const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart(); - auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, - owner_process.GetIdealCore(), stack_top, owner_process); + ThreadType type = THREADTYPE_USER; + auto thread_res = Thread::Create(system, type, "main", entry_point, priority, 0, + owner_process.GetIdealCore(), stack_top, &owner_process); std::shared_ptr thread = std::move(thread_res).Unwrap(); @@ -48,8 +49,12 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, V thread->GetContext32().cpu_registers[1] = thread_handle; thread->GetContext64().cpu_registers[1] = thread_handle; + auto& kernel = system.Kernel(); // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires - thread->ResumeFromWait(); + { + SchedulerLock lock{kernel}; + thread->SetStatus(ThreadStatus::Ready); + } } } // Anonymous namespace @@ -294,7 +299,7 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) { ChangeStatus(ProcessStatus::Running); - SetupMainThread(*this, kernel, main_thread_priority, main_thread_stack_top); + SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top); resource_limit->Reserve(ResourceType::Threads, 1); resource_limit->Reserve(ResourceType::PhysicalMemory, main_thread_stack_size); } diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 1140c72a3..5166020a0 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -11,11 +11,15 @@ #include #include "common/assert.h" +#include "common/bit_util.h" +#include "common/fiber.h" #include "common/logging/log.h" #include "core/arm/arm_interface.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/cpu_manager.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/time_manager.h" @@ -27,78 +31,108 @@ GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {} GlobalScheduler::~GlobalScheduler() = default; void GlobalScheduler::AddThread(std::shared_ptr thread) { + global_list_guard.lock(); thread_list.push_back(std::move(thread)); + global_list_guard.unlock(); } void GlobalScheduler::RemoveThread(std::shared_ptr thread) { + global_list_guard.lock(); thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread), thread_list.end()); + global_list_guard.unlock(); } -void GlobalScheduler::UnloadThread(std::size_t core) { - Scheduler& sched = kernel.Scheduler(core); - sched.UnloadThread(); -} - -void GlobalScheduler::SelectThread(std::size_t core) { +u32 GlobalScheduler::SelectThreads() { const auto update_thread = [](Thread* thread, Scheduler& sched) { + sched.guard.lock(); if (thread != sched.selected_thread.get()) { if (thread == nullptr) { ++sched.idle_selection_count; } sched.selected_thread = SharedFrom(thread); } - sched.is_context_switch_pending = sched.selected_thread != sched.current_thread; + const bool reschedule_pending = sched.selected_thread != sched.current_thread; + sched.is_context_switch_pending = reschedule_pending; std::atomic_thread_fence(std::memory_order_seq_cst); + sched.guard.unlock(); + return reschedule_pending; }; - Scheduler& sched = kernel.Scheduler(core); - Thread* current_thread = nullptr; - // Step 1: Get top thread in schedule queue. - current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); - if (current_thread) { - update_thread(current_thread, sched); - return; + if (!is_reselection_pending.load()) { + return 0; } - // Step 2: Try selecting a suggested thread. - Thread* winner = nullptr; - std::set sug_cores; - for (auto thread : suggested_queue[core]) { - s32 this_core = thread->GetProcessorID(); - Thread* thread_on_core = nullptr; - if (this_core >= 0) { - thread_on_core = scheduled_queue[this_core].front(); - } - if (this_core < 0 || thread != thread_on_core) { - winner = thread; - break; + std::array top_threads{}; + + u32 idle_cores{}; + + // Step 1: Get top thread in schedule queue. + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + Thread* top_thread = + scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); + if (top_thread != nullptr) { + // TODO(Blinkhawk): Implement Thread Pinning + } else { + idle_cores |= (1ul << core); } - sug_cores.insert(this_core); + top_threads[core] = top_thread; } - // if we got a suggested thread, select it, else do a second pass. - if (winner && winner->GetPriority() > 2) { - if (winner->IsRunning()) { - UnloadThread(static_cast(winner->GetProcessorID())); + + while (idle_cores != 0) { + u32 core_id = Common::CountTrailingZeroes32(idle_cores); + + if (!suggested_queue[core_id].empty()) { + std::array migration_candidates{}; + std::size_t num_candidates = 0; + auto iter = suggested_queue[core_id].begin(); + Thread* suggested = nullptr; + // Step 2: Try selecting a suggested thread. + while (iter != suggested_queue[core_id].end()) { + suggested = *iter; + iter++; + s32 suggested_core_id = suggested->GetProcessorID(); + Thread* top_thread = + suggested_core_id > 0 ? top_threads[suggested_core_id] : nullptr; + if (top_thread != suggested) { + if (top_thread != nullptr && + top_thread->GetPriority() < THREADPRIO_MAX_CORE_MIGRATION) { + suggested = nullptr; + break; + // There's a too high thread to do core migration, cancel + } + TransferToCore(suggested->GetPriority(), static_cast(core_id), suggested); + break; + } + migration_candidates[num_candidates++] = suggested_core_id; + } + // Step 3: Select a suggested thread from another core + if (suggested == nullptr) { + for (std::size_t i = 0; i < num_candidates; i++) { + s32 candidate_core = migration_candidates[i]; + suggested = top_threads[candidate_core]; + auto it = scheduled_queue[candidate_core].begin(); + it++; + Thread* next = it != scheduled_queue[candidate_core].end() ? *it : nullptr; + if (next != nullptr) { + TransferToCore(suggested->GetPriority(), static_cast(core_id), + suggested); + top_threads[candidate_core] = next; + break; + } + } + } + top_threads[core_id] = suggested; } - TransferToCore(winner->GetPriority(), static_cast(core), winner); - update_thread(winner, sched); - return; + + idle_cores &= ~(1ul << core_id); } - // Step 3: Select a suggested thread from another core - for (auto& src_core : sug_cores) { - auto it = scheduled_queue[src_core].begin(); - it++; - if (it != scheduled_queue[src_core].end()) { - Thread* thread_on_core = scheduled_queue[src_core].front(); - Thread* to_change = *it; - if (thread_on_core->IsRunning() || to_change->IsRunning()) { - UnloadThread(static_cast(src_core)); - } - TransferToCore(thread_on_core->GetPriority(), static_cast(core), thread_on_core); - current_thread = thread_on_core; - break; + u32 cores_needing_context_switch{}; + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + Scheduler& sched = kernel.Scheduler(core); + if (update_thread(top_threads[core], sched)) { + cores_needing_context_switch |= (1ul << core); } } - update_thread(current_thread, sched); + return cores_needing_context_switch; } bool GlobalScheduler::YieldThread(Thread* yielding_thread) { @@ -153,9 +187,6 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { if (winner != nullptr) { if (winner != yielding_thread) { - if (winner->IsRunning()) { - UnloadThread(static_cast(winner->GetProcessorID())); - } TransferToCore(winner->GetPriority(), s32(core_id), winner); } } else { @@ -195,9 +226,6 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread } if (winner != nullptr) { if (winner != yielding_thread) { - if (winner->IsRunning()) { - UnloadThread(static_cast(winner->GetProcessorID())); - } TransferToCore(winner->GetPriority(), static_cast(core_id), winner); } } else { @@ -213,7 +241,9 @@ void GlobalScheduler::PreemptThreads() { const u32 priority = preemption_priorities[core_id]; if (scheduled_queue[core_id].size(priority) > 0) { - scheduled_queue[core_id].front(priority)->IncrementYieldCount(); + if (scheduled_queue[core_id].size(priority) > 1) { + scheduled_queue[core_id].front(priority)->IncrementYieldCount(); + } scheduled_queue[core_id].yield(priority); if (scheduled_queue[core_id].size(priority) > 1) { scheduled_queue[core_id].front(priority)->IncrementYieldCount(); @@ -247,9 +277,6 @@ void GlobalScheduler::PreemptThreads() { } if (winner != nullptr) { - if (winner->IsRunning()) { - UnloadThread(static_cast(winner->GetProcessorID())); - } TransferToCore(winner->GetPriority(), s32(core_id), winner); current_thread = winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread; @@ -280,9 +307,6 @@ void GlobalScheduler::PreemptThreads() { } if (winner != nullptr) { - if (winner->IsRunning()) { - UnloadThread(static_cast(winner->GetProcessorID())); - } TransferToCore(winner->GetPriority(), s32(core_id), winner); current_thread = winner; } @@ -292,6 +316,28 @@ void GlobalScheduler::PreemptThreads() { } } +void GlobalScheduler::EnableInterruptAndSchedule(u32 cores_pending_reschedule, + Core::EmuThreadHandle global_thread) { + u32 current_core = global_thread.host_handle; + bool must_context_switch = global_thread.guest_handle != InvalidHandle && + (current_core < Core::Hardware::NUM_CPU_CORES); + while (cores_pending_reschedule != 0) { + u32 core = Common::CountTrailingZeroes32(cores_pending_reschedule); + ASSERT(core < Core::Hardware::NUM_CPU_CORES); + if (!must_context_switch || core != current_core) { + auto& phys_core = kernel.PhysicalCore(core); + phys_core.Interrupt(); + } else { + must_context_switch = true; + } + cores_pending_reschedule &= ~(1ul << core); + } + if (must_context_switch) { + auto& core_scheduler = kernel.CurrentScheduler(); + core_scheduler.TryDoContextSwitch(); + } +} + void GlobalScheduler::Suggest(u32 priority, std::size_t core, Thread* thread) { suggested_queue[core].add(thread, priority); } @@ -349,6 +395,108 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, } } +void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) { + if (old_flags == thread->scheduling_state) { + return; + } + + if (static_cast(old_flags & static_cast(ThreadSchedMasks::LowMask)) == + ThreadSchedStatus::Runnable) { + // In this case the thread was running, now it's pausing/exitting + if (thread->processor_id >= 0) { + Unschedule(thread->current_priority, static_cast(thread->processor_id), thread); + } + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (core != static_cast(thread->processor_id) && + ((thread->affinity_mask >> core) & 1) != 0) { + Unsuggest(thread->current_priority, core, thread); + } + } + } else if (thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable) { + // The thread is now set to running from being stopped + if (thread->processor_id >= 0) { + Schedule(thread->current_priority, static_cast(thread->processor_id), thread); + } + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (core != static_cast(thread->processor_id) && + ((thread->affinity_mask >> core) & 1) != 0) { + Suggest(thread->current_priority, core, thread); + } + } + } + + SetReselectionPending(); +} + +void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priority) { + if (thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable) { + return; + } + if (thread->processor_id >= 0) { + Unschedule(old_priority, static_cast(thread->processor_id), thread); + } + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (core != static_cast(thread->processor_id) && + ((thread->affinity_mask >> core) & 1) != 0) { + Unsuggest(old_priority, core, thread); + } + } + + if (thread->processor_id >= 0) { + // TODO(Blinkhawk): compare it with current thread running on current core, instead of + // checking running + if (thread->IsRunning()) { + SchedulePrepend(thread->current_priority, static_cast(thread->processor_id), + thread); + } else { + Schedule(thread->current_priority, static_cast(thread->processor_id), thread); + } + } + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (core != static_cast(thread->processor_id) && + ((thread->affinity_mask >> core) & 1) != 0) { + Suggest(thread->current_priority, core, thread); + } + } + thread->IncrementYieldCount(); + SetReselectionPending(); +} + +void GlobalScheduler::AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, + s32 old_core) { + if (thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable || + thread->current_priority >= THREADPRIO_COUNT) { + return; + } + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (((old_affinity_mask >> core) & 1) != 0) { + if (core == static_cast(old_core)) { + Unschedule(thread->current_priority, core, thread); + } else { + Unsuggest(thread->current_priority, core, thread); + } + } + } + + for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { + if (((thread->affinity_mask >> core) & 1) != 0) { + if (core == static_cast(thread->processor_id)) { + Schedule(thread->current_priority, core, thread); + } else { + Suggest(thread->current_priority, core, thread); + } + } + } + + thread->IncrementYieldCount(); + SetReselectionPending(); +} + void GlobalScheduler::Shutdown() { for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { scheduled_queue[core].clear(); @@ -374,13 +522,12 @@ void GlobalScheduler::Unlock() { ASSERT(scope_lock > 0); return; } - for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { - SelectThread(i); - } + u32 cores_pending_reschedule = SelectThreads(); + Core::EmuThreadHandle leaving_thread = current_owner; current_owner = Core::EmuThreadHandle::InvalidHandle(); scope_lock = 1; inner_lock.unlock(); - // TODO(Blinkhawk): Setup the interrupts and change context on current core. + EnableInterruptAndSchedule(cores_pending_reschedule, leaving_thread); } Scheduler::Scheduler(Core::System& system, std::size_t core_id) @@ -393,56 +540,83 @@ bool Scheduler::HaveReadyThreads() const { } Thread* Scheduler::GetCurrentThread() const { - return current_thread.get(); + if (current_thread) { + return current_thread.get(); + } + return idle_thread.get(); } Thread* Scheduler::GetSelectedThread() const { return selected_thread.get(); } -void Scheduler::SelectThreads() { - system.GlobalScheduler().SelectThread(core_id); -} - u64 Scheduler::GetLastContextSwitchTicks() const { return last_context_switch_time; } void Scheduler::TryDoContextSwitch() { + auto& phys_core = system.Kernel().CurrentPhysicalCore(); + if (phys_core.IsInterrupted()) { + phys_core.ClearInterrupt(); + } + guard.lock(); if (is_context_switch_pending) { SwitchContext(); + } else { + guard.unlock(); } } -void Scheduler::UnloadThread() { - Thread* const previous_thread = GetCurrentThread(); - Process* const previous_process = system.Kernel().CurrentProcess(); +void Scheduler::OnThreadStart() { + SwitchContextStep2(); +} - UpdateLastContextSwitchTime(previous_thread, previous_process); +void Scheduler::SwitchContextStep2() { + Thread* previous_thread = current_thread.get(); + Thread* new_thread = selected_thread.get(); - // Save context for previous thread - if (previous_thread) { - system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); - system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); - // Save the TPIDR_EL0 system register in case it was modified. - previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); + // Load context of new thread + Process* const previous_process = + previous_thread != nullptr ? previous_thread->GetOwnerProcess() : nullptr; - if (previous_thread->GetStatus() == ThreadStatus::Running) { - // This is only the case when a reschedule is triggered without the current thread - // yielding execution (i.e. an event triggered, system core time-sliced, etc) - previous_thread->SetStatus(ThreadStatus::Ready); + if (new_thread) { + new_thread->context_guard.lock(); + ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id), + "Thread must be assigned to this core."); + ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready, + "Thread must be ready to become running."); + + // Cancel any outstanding wakeup events for this thread + current_thread = SharedFrom(new_thread); + new_thread->SetStatus(ThreadStatus::Running); + new_thread->SetIsRunning(true); + + auto* const thread_owner_process = current_thread->GetOwnerProcess(); + if (previous_process != thread_owner_process && thread_owner_process != nullptr) { + system.Kernel().MakeCurrentProcess(thread_owner_process); } - previous_thread->SetIsRunning(false); + if (!new_thread->IsHLEThread()) { + auto& cpu_core = system.ArmInterface(core_id); + cpu_core.LoadContext(new_thread->GetContext32()); + cpu_core.LoadContext(new_thread->GetContext64()); + cpu_core.SetTlsAddress(new_thread->GetTLSAddress()); + cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); + } + } else { + current_thread = nullptr; + // Note: We do not reset the current process and current page table when idling because + // technically we haven't changed processes, our threads are just paused. } - current_thread = nullptr; + guard.unlock(); } void Scheduler::SwitchContext() { - Thread* const previous_thread = GetCurrentThread(); - Thread* const new_thread = GetSelectedThread(); + Thread* previous_thread = current_thread.get(); + Thread* new_thread = selected_thread.get(); is_context_switch_pending = false; if (new_thread == previous_thread) { + guard.unlock(); return; } @@ -452,51 +626,44 @@ void Scheduler::SwitchContext() { // Save context for previous thread if (previous_thread) { - system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); - system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); - // Save the TPIDR_EL0 system register in case it was modified. - previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); + if (!previous_thread->IsHLEThread()) { + auto& cpu_core = system.ArmInterface(core_id); + cpu_core.SaveContext(previous_thread->GetContext32()); + cpu_core.SaveContext(previous_thread->GetContext64()); + // Save the TPIDR_EL0 system register in case it was modified. + previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); + } if (previous_thread->GetStatus() == ThreadStatus::Running) { - // This is only the case when a reschedule is triggered without the current thread - // yielding execution (i.e. an event triggered, system core time-sliced, etc) previous_thread->SetStatus(ThreadStatus::Ready); } previous_thread->SetIsRunning(false); + previous_thread->context_guard.unlock(); } - // Load context of new thread - if (new_thread) { - ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id), - "Thread must be assigned to this core."); - ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready, - "Thread must be ready to become running."); - - // Cancel any outstanding wakeup events for this thread - new_thread->CancelWakeupTimer(); - current_thread = SharedFrom(new_thread); - new_thread->SetStatus(ThreadStatus::Running); - new_thread->SetIsRunning(true); - - auto* const thread_owner_process = current_thread->GetOwnerProcess(); - if (previous_process != thread_owner_process) { - system.Kernel().MakeCurrentProcess(thread_owner_process); - } + std::shared_ptr old_context; + if (previous_thread != nullptr) { + old_context = previous_thread->GetHostContext(); + } else { + old_context = idle_thread->GetHostContext(); + } - system.ArmInterface(core_id).LoadContext(new_thread->GetContext32()); - system.ArmInterface(core_id).LoadContext(new_thread->GetContext64()); - system.ArmInterface(core_id).SetTlsAddress(new_thread->GetTLSAddress()); - system.ArmInterface(core_id).SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); + std::shared_ptr next_context; + if (new_thread != nullptr) { + next_context = new_thread->GetHostContext(); } else { - current_thread = nullptr; - // Note: We do not reset the current process and current page table when idling because - // technically we haven't changed processes, our threads are just paused. + next_context = idle_thread->GetHostContext(); } + + Common::Fiber::YieldTo(old_context, next_context); + /// When a thread wakes up, the scheduler may have changed to other in another core. + auto& next_scheduler = system.Kernel().CurrentScheduler(); + next_scheduler.SwitchContextStep2(); } void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { const u64 prev_switch_ticks = last_context_switch_time; - const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks(); + const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks(); const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; if (thread != nullptr) { @@ -510,6 +677,16 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { last_context_switch_time = most_recent_switch_ticks; } +void Scheduler::Initialize() { + std::string name = "Idle Thread Id:" + std::to_string(core_id); + std::function init_func = system.GetCpuManager().GetIdleThreadStartFunc(); + void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater(); + ThreadType type = static_cast(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE); + auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast(core_id), 0, + nullptr, std::move(init_func), init_func_parameter); + idle_thread = std::move(thread_res).Unwrap(); +} + void Scheduler::Shutdown() { current_thread = nullptr; selected_thread = nullptr; diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 07df33f9c..16655b03f 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "common/multi_level_queue.h" +#include "common/spin_lock.h" #include "core/hardware_properties.h" #include "core/hle/kernel/thread.h" @@ -41,41 +42,17 @@ public: return thread_list; } - /** - * Add a thread to the suggested queue of a cpu core. Suggested threads may be - * picked if no thread is scheduled to run on the core. - */ - void Suggest(u32 priority, std::size_t core, Thread* thread); - - /** - * Remove a thread to the suggested queue of a cpu core. Suggested threads may be - * picked if no thread is scheduled to run on the core. - */ - void Unsuggest(u32 priority, std::size_t core, Thread* thread); - - /** - * Add a thread to the scheduling queue of a cpu core. The thread is added at the - * back the queue in its priority level. - */ - void Schedule(u32 priority, std::size_t core, Thread* thread); + /// Notify the scheduler a thread's status has changed. + void AdjustSchedulingOnStatus(Thread* thread, u32 old_flags); - /** - * Add a thread to the scheduling queue of a cpu core. The thread is added at the - * front the queue in its priority level. - */ - void SchedulePrepend(u32 priority, std::size_t core, Thread* thread); + /// Notify the scheduler a thread's priority has changed. + void AdjustSchedulingOnPriority(Thread* thread, u32 old_priority); - /// Reschedule an already scheduled thread based on a new priority - void Reschedule(u32 priority, std::size_t core, Thread* thread); - - /// Unschedules a thread. - void Unschedule(u32 priority, std::size_t core, Thread* thread); - - /// Selects a core and forces it to unload its current thread's context - void UnloadThread(std::size_t core); + /// Notify the scheduler a thread's core and/or affinity mask has changed. + void AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, s32 old_core); /** - * Takes care of selecting the new scheduled thread in three steps: + * Takes care of selecting the new scheduled threads in three steps: * * 1. First a thread is selected from the top of the priority queue. If no thread * is obtained then we move to step two, else we are done. @@ -85,8 +62,10 @@ public: * * 3. Third is no suggested thread is found, we do a second pass and pick a running * thread in another core and swap it with its current thread. + * + * returns the cores needing scheduling. */ - void SelectThread(std::size_t core); + u32 SelectThreads(); bool HaveReadyThreads(std::size_t core_id) const { return !scheduled_queue[core_id].empty(); @@ -149,6 +128,39 @@ private: /// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling /// and reschedules current core if needed. void Unlock(); + + void EnableInterruptAndSchedule(u32 cores_pending_reschedule, Core::EmuThreadHandle global_thread); + + /** + * Add a thread to the suggested queue of a cpu core. Suggested threads may be + * picked if no thread is scheduled to run on the core. + */ + void Suggest(u32 priority, std::size_t core, Thread* thread); + + /** + * Remove a thread to the suggested queue of a cpu core. Suggested threads may be + * picked if no thread is scheduled to run on the core. + */ + void Unsuggest(u32 priority, std::size_t core, Thread* thread); + + /** + * Add a thread to the scheduling queue of a cpu core. The thread is added at the + * back the queue in its priority level. + */ + void Schedule(u32 priority, std::size_t core, Thread* thread); + + /** + * Add a thread to the scheduling queue of a cpu core. The thread is added at the + * front the queue in its priority level. + */ + void SchedulePrepend(u32 priority, std::size_t core, Thread* thread); + + /// Reschedule an already scheduled thread based on a new priority + void Reschedule(u32 priority, std::size_t core, Thread* thread); + + /// Unschedules a thread. + void Unschedule(u32 priority, std::size_t core, Thread* thread); + /** * Transfers a thread into an specific core. If the destination_core is -1 * it will be unscheduled from its source code and added into its suggested @@ -174,6 +186,8 @@ private: std::atomic scope_lock{}; Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()}; + Common::SpinLock global_list_guard{}; + /// Lists all thread ids that aren't deleted/etc. std::vector> thread_list; KernelCore& kernel; @@ -190,12 +204,6 @@ public: /// Reschedules to the next available thread (call after current thread is suspended) void TryDoContextSwitch(); - /// Unloads currently running thread - void UnloadThread(); - - /// Select the threads in top of the scheduling multilist. - void SelectThreads(); - /// Gets the current running thread Thread* GetCurrentThread() const; @@ -209,15 +217,22 @@ public: return is_context_switch_pending; } + void Initialize(); + /// Shutdowns the scheduler. void Shutdown(); + void OnThreadStart(); + private: friend class GlobalScheduler; /// Switches the CPU's active thread context to that of the specified thread void SwitchContext(); + /// When a thread wakes up, it must run this through it's new scheduler + void SwitchContextStep2(); + /** * Called on every context switch to update the internal timestamp * This also updates the running time ticks for the given thread and @@ -233,12 +248,15 @@ private: std::shared_ptr current_thread = nullptr; std::shared_ptr selected_thread = nullptr; + std::shared_ptr idle_thread = nullptr; Core::System& system; u64 last_context_switch_time = 0; u64 idle_selection_count = 0; const std::size_t core_id; + Common::SpinLock guard{}; + bool is_context_switch_pending = false; }; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 4ae4529f5..d7f0dcabd 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -863,9 +863,9 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks(); - out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks); + out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks); } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) { - out_ticks = core_timing.GetTicks() - prev_ctx_ticks; + out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks; } *result = out_ticks; @@ -1428,9 +1428,10 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(ResourceType::Threads, 1)); + ThreadType type = THREADTYPE_USER; CASCADE_RESULT(std::shared_ptr thread, - Thread::Create(kernel, "", entry_point, priority, arg, processor_id, stack_top, - *current_process)); + Thread::Create(system, type, "", entry_point, priority, arg, processor_id, stack_top, + current_process)); const auto new_thread_handle = current_process->GetHandleTable().Create(thread); if (new_thread_handle.Failed()) { @@ -1513,13 +1514,6 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { } else { current_thread->Sleep(nanoseconds); } - - if (is_redundant) { - // If it's redundant, the core is pretty much idle. Some games keep idling - // a core while it's doing nothing, we advance timing to avoid costly continuous - // calls. - system.CoreTiming().AddTicks(2000); - } system.PrepareReschedule(current_thread->GetProcessorID()); } @@ -1725,10 +1719,7 @@ static u64 GetSystemTick(Core::System& system) { auto& core_timing = system.CoreTiming(); // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick) - const u64 result{Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks())}; - - // Advance time to defeat dumb games that busy-wait for the frame to end. - core_timing.AddTicks(400); + const u64 result{system.CoreTiming().GetClockTicks()}; return result; } diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index db7f379ac..8cb3593db 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -9,12 +9,14 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/fiber.h" #include "common/logging/log.h" #include "common/thread_queue_list.h" #include "core/arm/arm_interface.h" #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/cpu_manager.h" #include "core/hardware_properties.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" @@ -23,6 +25,7 @@ #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" #include "core/hle/result.h" #include "core/memory.h" @@ -44,6 +47,7 @@ Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {} Thread::~Thread() = default; void Thread::Stop() { + SchedulerLock lock(kernel); // Cancel any outstanding wakeup events for this thread Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), global_handle); @@ -71,9 +75,8 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { // This function might be called from any thread so we have to be cautious and use the // thread-safe version of ScheduleEvent. - const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); Core::System::GetInstance().CoreTiming().ScheduleEvent( - cycles, kernel.ThreadWakeupCallbackEventType(), global_handle); + nanoseconds, kernel.ThreadWakeupCallbackEventType(), global_handle); } void Thread::CancelWakeupTimer() { @@ -125,6 +128,16 @@ void Thread::ResumeFromWait() { SetStatus(ThreadStatus::Ready); } +void Thread::OnWakeUp() { + SchedulerLock lock(kernel); + if (activity == ThreadActivity::Paused) { + SetStatus(ThreadStatus::Paused); + return; + } + + SetStatus(ThreadStatus::Ready); +} + void Thread::CancelWait() { if (GetSchedulingStatus() != ThreadSchedStatus::Paused) { is_sync_cancelled = true; @@ -153,12 +166,29 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context, context.fpcr = 0; } -ResultVal> Thread::Create(KernelCore& kernel, std::string name, - VAddr entry_point, u32 priority, u64 arg, - s32 processor_id, VAddr stack_top, - Process& owner_process) { +std::shared_ptr Thread::GetHostContext() const { + return host_context; +} + +ResultVal> Thread::Create(Core::System& system, ThreadType type_flags, + std::string name, VAddr entry_point, u32 priority, + u64 arg, s32 processor_id, VAddr stack_top, + Process* owner_process) { + std::function init_func = system.GetCpuManager().GetGuestThreadStartFunc(); + void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater(); + return Create(system, type_flags, name, entry_point, priority, arg, processor_id, stack_top, + owner_process, std::move(init_func), init_func_parameter); +} + +ResultVal> Thread::Create(Core::System& system, ThreadType type_flags, + std::string name, VAddr entry_point, u32 priority, + u64 arg, s32 processor_id, VAddr stack_top, + Process* owner_process, + std::function&& thread_start_func, + void* thread_start_parameter) { + auto& kernel = system.Kernel(); // Check if priority is in ranged. Lowest priority -> highest priority id. - if (priority > THREADPRIO_LOWEST) { + if (priority > THREADPRIO_LOWEST && (type_flags & THREADTYPE_IDLE == 0)) { LOG_ERROR(Kernel_SVC, "Invalid thread priority: {}", priority); return ERR_INVALID_THREAD_PRIORITY; } @@ -168,11 +198,12 @@ ResultVal> Thread::Create(KernelCore& kernel, std::strin return ERR_INVALID_PROCESSOR_ID; } - auto& system = Core::System::GetInstance(); - if (!system.Memory().IsValidVirtualAddress(owner_process, entry_point)) { - LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); - // TODO (bunnei): Find the correct error code to use here - return RESULT_UNKNOWN; + if (owner_process) { + if (!system.Memory().IsValidVirtualAddress(*owner_process, entry_point)) { + LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); + // TODO (bunnei): Find the correct error code to use here + return RESULT_UNKNOWN; + } } std::shared_ptr thread = std::make_shared(kernel); @@ -183,7 +214,7 @@ ResultVal> Thread::Create(KernelCore& kernel, std::strin thread->stack_top = stack_top; thread->tpidr_el0 = 0; thread->nominal_priority = thread->current_priority = priority; - thread->last_running_ticks = system.CoreTiming().GetTicks(); + thread->last_running_ticks = 0; thread->processor_id = processor_id; thread->ideal_core = processor_id; thread->affinity_mask = 1ULL << processor_id; @@ -193,16 +224,27 @@ ResultVal> Thread::Create(KernelCore& kernel, std::strin thread->wait_handle = 0; thread->name = std::move(name); thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap(); - thread->owner_process = &owner_process; - auto& scheduler = kernel.GlobalScheduler(); - scheduler.AddThread(thread); - thread->tls_address = thread->owner_process->CreateTLSRegion(); - - thread->owner_process->RegisterThread(thread.get()); - - ResetThreadContext32(thread->context_32, static_cast(stack_top), - static_cast(entry_point), static_cast(arg)); - ResetThreadContext64(thread->context_64, stack_top, entry_point, arg); + thread->owner_process = owner_process; + thread->type = type_flags; + if ((type_flags & THREADTYPE_IDLE) == 0) { + auto& scheduler = kernel.GlobalScheduler(); + scheduler.AddThread(thread); + } + if (owner_process) { + thread->tls_address = thread->owner_process->CreateTLSRegion(); + thread->owner_process->RegisterThread(thread.get()); + } else { + thread->tls_address = 0; + } + // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used + // to initialize the context + if ((type_flags & THREADTYPE_HLE) == 0) { + ResetThreadContext32(thread->context_32, static_cast(stack_top), + static_cast(entry_point), static_cast(arg)); + ResetThreadContext64(thread->context_64, stack_top, entry_point, arg); + } + thread->host_context = + std::make_shared(std::move(thread_start_func), thread_start_parameter); return MakeResult>(std::move(thread)); } @@ -258,7 +300,7 @@ void Thread::SetStatus(ThreadStatus new_status) { } if (status == ThreadStatus::Running) { - last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); + last_running_ticks = Core::System::GetInstance().CoreTiming().GetCPUTicks(); } status = new_status; @@ -375,38 +417,55 @@ void Thread::SetActivity(ThreadActivity value) { } void Thread::Sleep(s64 nanoseconds) { - // Sleep current thread and check for next thread to schedule - SetStatus(ThreadStatus::WaitSleep); + Handle event_handle{}; + { + SchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds); + SetStatus(ThreadStatus::WaitSleep); + } - // Create an event to wake the thread up after the specified nanosecond delay has passed - WakeAfterDelay(nanoseconds); + if (event_handle != InvalidHandle) { + auto& time_manager = kernel.TimeManager(); + time_manager.UnscheduleTimeEvent(event_handle); + } } bool Thread::YieldSimple() { - auto& scheduler = kernel.GlobalScheduler(); - return scheduler.YieldThread(this); + bool result{}; + { + SchedulerLock lock(kernel); + result = kernel.GlobalScheduler().YieldThread(this); + } + return result; } bool Thread::YieldAndBalanceLoad() { - auto& scheduler = kernel.GlobalScheduler(); - return scheduler.YieldThreadAndBalanceLoad(this); + bool result{}; + { + SchedulerLock lock(kernel); + result = kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this); + } + return result; } bool Thread::YieldAndWaitForLoadBalancing() { - auto& scheduler = kernel.GlobalScheduler(); - return scheduler.YieldThreadAndWaitForLoadBalancing(this); + bool result{}; + { + SchedulerLock lock(kernel); + result = kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this); + } + return result; } void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) { const u32 old_flags = scheduling_state; scheduling_state = (scheduling_state & static_cast(ThreadSchedMasks::HighMask)) | static_cast(new_status); - AdjustSchedulingOnStatus(old_flags); + kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_flags); } void Thread::SetCurrentPriority(u32 new_priority) { const u32 old_priority = std::exchange(current_priority, new_priority); - AdjustSchedulingOnPriority(old_priority); + kernel.GlobalScheduler().AdjustSchedulingOnPriority(this, old_priority); } ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { @@ -443,111 +502,12 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { processor_id = ideal_core; } } - AdjustSchedulingOnAffinity(old_affinity_mask, old_core); + kernel.GlobalScheduler().AdjustSchedulingOnAffinity(this, old_affinity_mask, old_core); } } return RESULT_SUCCESS; } -void Thread::AdjustSchedulingOnStatus(u32 old_flags) { - if (old_flags == scheduling_state) { - return; - } - - auto& scheduler = kernel.GlobalScheduler(); - if (static_cast(old_flags & static_cast(ThreadSchedMasks::LowMask)) == - ThreadSchedStatus::Runnable) { - // In this case the thread was running, now it's pausing/exitting - if (processor_id >= 0) { - scheduler.Unschedule(current_priority, static_cast(processor_id), this); - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast(processor_id) && ((affinity_mask >> core) & 1) != 0) { - scheduler.Unsuggest(current_priority, core, this); - } - } - } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) { - // The thread is now set to running from being stopped - if (processor_id >= 0) { - scheduler.Schedule(current_priority, static_cast(processor_id), this); - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast(processor_id) && ((affinity_mask >> core) & 1) != 0) { - scheduler.Suggest(current_priority, core, this); - } - } - } - - scheduler.SetReselectionPending(); -} - -void Thread::AdjustSchedulingOnPriority(u32 old_priority) { - if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { - return; - } - auto& scheduler = kernel.GlobalScheduler(); - if (processor_id >= 0) { - scheduler.Unschedule(old_priority, static_cast(processor_id), this); - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast(processor_id) && ((affinity_mask >> core) & 1) != 0) { - scheduler.Unsuggest(old_priority, core, this); - } - } - - // Add thread to the new priority queues. - Thread* current_thread = GetCurrentThread(); - - if (processor_id >= 0) { - if (current_thread == this) { - scheduler.SchedulePrepend(current_priority, static_cast(processor_id), this); - } else { - scheduler.Schedule(current_priority, static_cast(processor_id), this); - } - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (core != static_cast(processor_id) && ((affinity_mask >> core) & 1) != 0) { - scheduler.Suggest(current_priority, core, this); - } - } - - scheduler.SetReselectionPending(); -} - -void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { - auto& scheduler = kernel.GlobalScheduler(); - if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || - current_priority >= THREADPRIO_COUNT) { - return; - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (((old_affinity_mask >> core) & 1) != 0) { - if (core == static_cast(old_core)) { - scheduler.Unschedule(current_priority, core, this); - } else { - scheduler.Unsuggest(current_priority, core, this); - } - } - } - - for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { - if (((affinity_mask >> core) & 1) != 0) { - if (core == static_cast(processor_id)) { - scheduler.Schedule(current_priority, core, this); - } else { - scheduler.Suggest(current_priority, core, this); - } - } - } - - scheduler.SetReselectionPending(); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// /** diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 23fdef8a4..33d340b47 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -9,23 +9,42 @@ #include #include "common/common_types.h" +#include "common/spin_lock.h" #include "core/arm/arm_interface.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/synchronization_object.h" #include "core/hle/result.h" +namespace Common { +class Fiber; +} + +namespace Core { +class System; +} + namespace Kernel { +class GlobalScheduler; class KernelCore; class Process; class Scheduler; enum ThreadPriority : u32 { - THREADPRIO_HIGHEST = 0, ///< Highest thread priority - THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps - THREADPRIO_DEFAULT = 44, ///< Default thread priority for userland apps - THREADPRIO_LOWEST = 63, ///< Lowest thread priority - THREADPRIO_COUNT = 64, ///< Total number of possible thread priorities. + THREADPRIO_HIGHEST = 0, ///< Highest thread priority + THREADPRIO_MAX_CORE_MIGRATION = 2, ///< Highest priority for a core migration + THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps + THREADPRIO_DEFAULT = 44, ///< Default thread priority for userland apps + THREADPRIO_LOWEST = 63, ///< Lowest thread priority + THREADPRIO_COUNT = 64, ///< Total number of possible thread priorities. +}; + +enum ThreadType : u32 { + THREADTYPE_USER = 0x1, + THREADTYPE_KERNEL = 0x2, + THREADTYPE_HLE = 0x4, + THREADTYPE_IDLE = 0x8, + THREADTYPE_SUSPEND = 0x10, }; enum ThreadProcessorId : s32 { @@ -111,22 +130,43 @@ public: std::function thread, std::shared_ptr object, std::size_t index)>; + /** + * Creates and returns a new thread. The new thread is immediately scheduled + * @param system The instance of the whole system + * @param name The friendly name desired for the thread + * @param entry_point The address at which the thread should start execution + * @param priority The thread's priority + * @param arg User data to pass to the thread + * @param processor_id The ID(s) of the processors on which the thread is desired to be run + * @param stack_top The address of the thread's stack top + * @param owner_process The parent process for the thread, if null, it's a kernel thread + * @return A shared pointer to the newly created thread + */ + static ResultVal> Create(Core::System& system, ThreadType type_flags, std::string name, + VAddr entry_point, u32 priority, u64 arg, + s32 processor_id, VAddr stack_top, + Process* owner_process); + /** * Creates and returns a new thread. The new thread is immediately scheduled - * @param kernel The kernel instance this thread will be created under. + * @param system The instance of the whole system * @param name The friendly name desired for the thread * @param entry_point The address at which the thread should start execution * @param priority The thread's priority * @param arg User data to pass to the thread * @param processor_id The ID(s) of the processors on which the thread is desired to be run * @param stack_top The address of the thread's stack top - * @param owner_process The parent process for the thread + * @param owner_process The parent process for the thread, if null, it's a kernel thread + * @param thread_start_func The function where the host context will start. + * @param thread_start_parameter The parameter which will passed to host context on init * @return A shared pointer to the newly created thread */ - static ResultVal> Create(KernelCore& kernel, std::string name, + static ResultVal> Create(Core::System& system, ThreadType type_flags, std::string name, VAddr entry_point, u32 priority, u64 arg, s32 processor_id, VAddr stack_top, - Process& owner_process); + Process* owner_process, + std::function&& thread_start_func, + void* thread_start_parameter); std::string GetName() const override { return name; @@ -192,7 +232,9 @@ public: } /// Resumes a thread from waiting - void ResumeFromWait(); + void /* deprecated */ ResumeFromWait(); + + void OnWakeUp(); /// Cancels a waiting operation that this thread may or may not be within. /// @@ -206,10 +248,10 @@ public: * Schedules an event to wake up the specified thread after the specified delay * @param nanoseconds The time this thread will be allowed to sleep for */ - void WakeAfterDelay(s64 nanoseconds); + void /* deprecated */ WakeAfterDelay(s64 nanoseconds); /// Cancel any outstanding wakeup events for this thread - void CancelWakeupTimer(); + void /* deprecated */ CancelWakeupTimer(); /** * Sets the result after the thread awakens (from svcWaitSynchronization) @@ -290,6 +332,12 @@ public: return context_64; } + bool IsHLEThread() const { + return (type & THREADTYPE_HLE) != 0; + } + + std::shared_ptr GetHostContext() const; + ThreadStatus GetStatus() const { return status; } @@ -467,16 +515,19 @@ public: } private: + friend class GlobalScheduler; + friend class Scheduler; + void SetSchedulingStatus(ThreadSchedStatus new_status); void SetCurrentPriority(u32 new_priority); ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask); - void AdjustSchedulingOnStatus(u32 old_flags); - void AdjustSchedulingOnPriority(u32 old_priority); void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core); ThreadContext32 context_32{}; ThreadContext64 context_64{}; + Common::SpinLock context_guard{}; + std::shared_ptr host_context{}; u64 thread_id = 0; @@ -485,6 +536,8 @@ private: VAddr entry_point = 0; VAddr stack_top = 0; + ThreadType type; + /// Nominal thread priority, as set by the emulated application. /// The nominal priority is the thread priority without priority /// inheritance taken into account. diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp index 21b290468..0b8f0d993 100644 --- a/src/core/hle/kernel/time_manager.cpp +++ b/src/core/hle/kernel/time_manager.cpp @@ -19,7 +19,7 @@ TimeManager::TimeManager(Core::System& system) : system{system} { Handle proper_handle = static_cast(thread_handle); std::shared_ptr thread = this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); - thread->ResumeFromWait(); + thread->OnWakeUp(); }); } diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp index 1f2131ec8..cb35919e9 100644 --- a/src/core/hle/service/hid/controllers/debug_pad.cpp +++ b/src/core/hle/service/hid/controllers/debug_pad.cpp @@ -23,7 +23,7 @@ void Controller_DebugPad::OnRelease() {} void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { - shared_memory.header.timestamp = core_timing.GetTicks(); + shared_memory.header.timestamp = core_timing.GetCPUTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp index 6e990dd00..b7b7bfeae 100644 --- a/src/core/hle/service/hid/controllers/gesture.cpp +++ b/src/core/hle/service/hid/controllers/gesture.cpp @@ -19,7 +19,7 @@ void Controller_Gesture::OnRelease() {} void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { - shared_memory.header.timestamp = core_timing.GetTicks(); + shared_memory.header.timestamp = core_timing.GetCPUTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp index 9a8d354ba..feae89525 100644 --- a/src/core/hle/service/hid/controllers/keyboard.cpp +++ b/src/core/hle/service/hid/controllers/keyboard.cpp @@ -21,7 +21,7 @@ void Controller_Keyboard::OnRelease() {} void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { - shared_memory.header.timestamp = core_timing.GetTicks(); + shared_memory.header.timestamp = core_timing.GetCPUTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp index 93d88ea50..ac40989c5 100644 --- a/src/core/hle/service/hid/controllers/mouse.cpp +++ b/src/core/hle/service/hid/controllers/mouse.cpp @@ -19,7 +19,7 @@ void Controller_Mouse::OnRelease() {} void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { - shared_memory.header.timestamp = core_timing.GetTicks(); + shared_memory.header.timestamp = core_timing.GetCPUTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 6fbee7efa..ef67ad690 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -328,7 +328,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* const auto& last_entry = main_controller->npad[main_controller->common.last_entry_index]; - main_controller->common.timestamp = core_timing.GetTicks(); + main_controller->common.timestamp = core_timing.GetCPUTicks(); main_controller->common.last_entry_index = (main_controller->common.last_entry_index + 1) % 17; diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp index 9e527d176..e7483bfa2 100644 --- a/src/core/hle/service/hid/controllers/stubbed.cpp +++ b/src/core/hle/service/hid/controllers/stubbed.cpp @@ -23,7 +23,7 @@ void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u } CommonHeader header{}; - header.timestamp = core_timing.GetTicks(); + header.timestamp = core_timing.GetCPUTicks(); header.total_entry_count = 17; header.entry_count = 0; header.last_entry_index = 0; diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp index 1c6e55566..e326f8f5c 100644 --- a/src/core/hle/service/hid/controllers/touchscreen.cpp +++ b/src/core/hle/service/hid/controllers/touchscreen.cpp @@ -22,7 +22,7 @@ void Controller_Touchscreen::OnRelease() {} void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { - shared_memory.header.timestamp = core_timing.GetTicks(); + shared_memory.header.timestamp = core_timing.GetCPUTicks(); shared_memory.header.total_entry_count = 17; if (!IsControllerActivated()) { @@ -49,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timin touch_entry.diameter_x = Settings::values.touchscreen.diameter_x; touch_entry.diameter_y = Settings::values.touchscreen.diameter_y; touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle; - const u64 tick = core_timing.GetTicks(); + const u64 tick = core_timing.GetCPUTicks(); touch_entry.delta_time = tick - last_touch; last_touch = tick; touch_entry.finger = Settings::values.touchscreen.finger; diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp index 27511b27b..2503ef241 100644 --- a/src/core/hle/service/hid/controllers/xpad.cpp +++ b/src/core/hle/service/hid/controllers/xpad.cpp @@ -20,7 +20,7 @@ void Controller_XPad::OnRelease() {} void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) { for (auto& xpad_entry : shared_memory.shared_memory_entries) { - xpad_entry.header.timestamp = core_timing.GetTicks(); + xpad_entry.header.timestamp = core_timing.GetCPUTicks(); xpad_entry.header.total_entry_count = 17; if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 57d5edea7..e9020e0dc 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -39,11 +39,9 @@ namespace Service::HID { // Updating period for each HID device. // TODO(ogniK): Find actual polling rate of hid -constexpr s64 pad_update_ticks = static_cast(Core::Hardware::BASE_CLOCK_RATE / 66); -[[maybe_unused]] constexpr s64 accelerometer_update_ticks = - static_cast(Core::Hardware::BASE_CLOCK_RATE / 100); -[[maybe_unused]] constexpr s64 gyroscope_update_ticks = - static_cast(Core::Hardware::BASE_CLOCK_RATE / 100); +constexpr s64 pad_update_ticks = static_cast(1000000000 / 66); +[[maybe_unused]] constexpr s64 accelerometer_update_ticks = static_cast(1000000000 / 100); +[[maybe_unused]] constexpr s64 gyroscope_update_ticks = static_cast(1000000000 / 100); constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; IAppletResource::IAppletResource(Core::System& system) @@ -78,8 +76,8 @@ IAppletResource::IAppletResource(Core::System& system) // Register update callbacks pad_update_event = - Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) { - UpdateControllers(userdata, cycles_late); + Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 ns_late) { + UpdateControllers(userdata, ns_late); }); // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?) @@ -109,7 +107,7 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) { rb.PushCopyObjects(shared_mem); } -void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) { +void IAppletResource::UpdateControllers(u64 userdata, s64 ns_late) { auto& core_timing = system.CoreTiming(); const bool should_reload = Settings::values.is_device_reload_pending.exchange(false); @@ -120,7 +118,7 @@ void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) { controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE); } - core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); + core_timing.ScheduleEvent(pad_update_ticks - ns_late, pad_update_event); } class IActiveVibrationDeviceList final : public ServiceFramework { diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp index 36ed6f7da..e82fd031b 100644 --- a/src/core/hle/service/hid/irs.cpp +++ b/src/core/hle/service/hid/irs.cpp @@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 5}; rb.Push(RESULT_SUCCESS); - rb.PushRaw(system.CoreTiming().GetTicks()); + rb.PushRaw(system.CoreTiming().GetCPUTicks()); rb.PushRaw(0); } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index 0d913334e..fba89e7a6 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp @@ -200,8 +200,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector& input, std::vector& o IoctlGetGpuTime params{}; std::memcpy(¶ms, input.data(), input.size()); - const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks()); - params.gpu_time = static_cast(ns.count()); + params.gpu_time = static_cast(system.CoreTiming().GetGlobalTimeNs().count()); std::memcpy(output.data(), ¶ms, output.size()); return 0; } diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 437bc5dee..aaf28995d 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -27,8 +27,8 @@ namespace Service::NVFlinger { -constexpr s64 frame_ticks = static_cast(Core::Hardware::BASE_CLOCK_RATE / 60); -constexpr s64 frame_ticks_30fps = static_cast(Core::Hardware::BASE_CLOCK_RATE / 30); +constexpr s64 frame_ticks = static_cast(1000000000 / 60); +constexpr s64 frame_ticks_30fps = static_cast(1000000000 / 30); NVFlinger::NVFlinger(Core::System& system) : system(system) { displays.emplace_back(0, "Default", system); @@ -39,11 +39,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) { // Schedule the screen composition events composition_event = - Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) { + Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 ns_late) { Compose(); - const auto ticks = - Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks(); - this->system.CoreTiming().ScheduleEvent(std::max(0LL, ticks - cycles_late), + const auto ticks = GetNextTicks(); + this->system.CoreTiming().ScheduleEvent(std::max(0LL, ticks - ns_late), composition_event); }); @@ -223,7 +222,7 @@ void NVFlinger::Compose() { s64 NVFlinger::GetNextTicks() const { constexpr s64 max_hertz = 120LL; - return (Core::Hardware::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz; + return (1000000000 * (1LL << swap_interval)) / max_hertz; } } // namespace Service::NVFlinger diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp index 1575f0b49..59a272f4a 100644 --- a/src/core/hle/service/time/standard_steady_clock_core.cpp +++ b/src/core/hle/service/time/standard_steady_clock_core.cpp @@ -11,9 +11,8 @@ namespace Service::Time::Clock { TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) { - const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( - Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Hardware::CNTFREQ)}; + const TimeSpanType ticks_time_span{ + TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds}; if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) { diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp index 44d5bc651..8baaa2a6a 100644 --- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp +++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp @@ -11,9 +11,8 @@ namespace Service::Time::Clock { SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) { - const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( - Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Hardware::CNTFREQ)}; + const TimeSpanType ticks_time_span{ + TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; return {ticks_time_span.ToSeconds(), GetClockSourceId()}; } diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index 67f1bbcf3..4cf58a61a 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -234,9 +234,8 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)}; if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) { - const auto ticks{Clock::TimeSpanType::FromTicks( - Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Hardware::CNTFREQ)}; + const auto ticks{Clock::TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), + Core::Hardware::CNTFREQ)}; const s64 base_time_point{context.offset + current_time_point.time_point - ticks.ToSeconds()}; IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp index 999ec1e51..e0ae9f874 100644 --- a/src/core/hle/service/time/time_sharedmemory.cpp +++ b/src/core/hle/service/time/time_sharedmemory.cpp @@ -30,8 +30,7 @@ void SharedMemory::SetupStandardSteadyClock(Core::System& system, const Common::UUID& clock_source_id, Clock::TimeSpanType current_time_point) { const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks( - Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), - Core::Hardware::CNTFREQ)}; + system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; const Clock::SteadyClockContext context{ static_cast(current_time_point.nanoseconds - ticks_time_span.nanoseconds), clock_source_id}; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9d87045a0..66634596d 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -29,15 +29,12 @@ namespace Core::Memory { struct Memory::Impl { explicit Impl(Core::System& system_) : system{system_} {} - void SetCurrentPageTable(Kernel::Process& process) { + void SetCurrentPageTable(Kernel::Process& process, u32 core_id) { current_page_table = &process.PageTable().PageTableImpl(); const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth(); - system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width); - system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width); - system.ArmInterface(2).PageTableChanged(*current_page_table, address_space_width); - system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); + system.ArmInterface(core_id).PageTableChanged(*current_page_table, address_space_width); } void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { @@ -689,8 +686,8 @@ struct Memory::Impl { Memory::Memory(Core::System& system) : impl{std::make_unique(system)} {} Memory::~Memory() = default; -void Memory::SetCurrentPageTable(Kernel::Process& process) { - impl->SetCurrentPageTable(process); +void Memory::SetCurrentPageTable(Kernel::Process& process, u32 core_id) { + impl->SetCurrentPageTable(process, core_id); } void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { diff --git a/src/core/memory.h b/src/core/memory.h index 9292f3b0a..93f0c1d6c 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -64,7 +64,7 @@ public: * * @param process The process to use the page table of. */ - void SetCurrentPageTable(Kernel::Process& process); + void SetCurrentPageTable(Kernel::Process& process, u32 core_id); /** * Maps an allocated buffer onto a region of the emulated process address space. diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index b139e8465..53d27859b 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -20,7 +20,7 @@ namespace Core::Memory { -constexpr s64 CHEAT_ENGINE_TICKS = static_cast(Core::Hardware::BASE_CLOCK_RATE / 12); +constexpr s64 CHEAT_ENGINE_TICKS = static_cast(1000000000 / 12); constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF; StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata) @@ -190,7 +190,7 @@ CheatEngine::~CheatEngine() { void CheatEngine::Initialize() { event = Core::Timing::CreateEvent( "CheatEngine::FrameCallback::" + Common::HexToString(metadata.main_nso_build_id), - [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); }); + [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); }); core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event); metadata.process_id = system.CurrentProcess()->GetProcessID(); @@ -217,7 +217,7 @@ void CheatEngine::Reload(std::vector cheats) { MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70)); -void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) { +void CheatEngine::FrameCallback(u64 userdata, s64 ns_late) { if (is_pending_reload.exchange(false)) { vm.LoadProgram(cheats); } @@ -230,7 +230,7 @@ void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) { vm.Execute(metadata); - core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event); + core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - ns_late, event); } } // namespace Core::Memory diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp index b2c6c537e..8b0c50d11 100644 --- a/src/core/tools/freezer.cpp +++ b/src/core/tools/freezer.cpp @@ -14,7 +14,7 @@ namespace Tools { namespace { -constexpr s64 MEMORY_FREEZER_TICKS = static_cast(Core::Hardware::BASE_CLOCK_RATE / 60); +constexpr s64 MEMORY_FREEZER_TICKS = static_cast(1000000000 / 60); u64 MemoryReadWidth(Core::Memory::Memory& memory, u32 width, VAddr addr) { switch (width) { @@ -57,7 +57,7 @@ Freezer::Freezer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& m : core_timing{core_timing_}, memory{memory_} { event = Core::Timing::CreateEvent( "MemoryFreezer::FrameCallback", - [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); }); + [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); }); core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS, event); } @@ -158,7 +158,7 @@ std::vector Freezer::GetEntries() const { return entries; } -void Freezer::FrameCallback(u64 userdata, s64 cycles_late) { +void Freezer::FrameCallback(u64 userdata, s64 ns_late) { if (!IsActive()) { LOG_DEBUG(Common_Memory, "Memory freezer has been deactivated, ending callback events."); return; @@ -173,7 +173,7 @@ void Freezer::FrameCallback(u64 userdata, s64 cycles_late) { MemoryWriteWidth(memory, entry.width, entry.address, entry.value); } - core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - cycles_late, event); + core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - ns_late, event); } void Freezer::FillEntryReads() { diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 3f750b51c..47ef30aa9 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -8,7 +8,6 @@ add_executable(tests core/arm/arm_test_common.cpp core/arm/arm_test_common.h core/core_timing.cpp - core/host_timing.cpp tests.cpp ) diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index ff2d11cc8..795f3da09 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -16,31 +16,30 @@ namespace { // Numbers are chosen randomly to make sure the correct one is given. -constexpr std::array CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; -constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals +static constexpr std::array CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; +static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals +static constexpr std::array calls_order{{2, 0, 1, 4, 3}}; +static std::array delays{}; std::bitset callbacks_ran_flags; u64 expected_callback = 0; s64 lateness = 0; template -void CallbackTemplate(u64 userdata, s64 cycles_late) { +void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) { static_assert(IDX < CB_IDS.size(), "IDX out of range"); callbacks_ran_flags.set(IDX); REQUIRE(CB_IDS[IDX] == userdata); - REQUIRE(CB_IDS[IDX] == expected_callback); - REQUIRE(lateness == cycles_late); + REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]); + delays[IDX] = nanoseconds_late; + ++expected_callback; } u64 callbacks_done = 0; -void EmptyCallback(u64 userdata, s64 cycles_late) { - ++callbacks_done; -} - struct ScopeInit final { ScopeInit() { - core_timing.Initialize(); + core_timing.Initialize([]() {}); } ~ScopeInit() { core_timing.Shutdown(); @@ -49,110 +48,97 @@ struct ScopeInit final { Core::Timing::CoreTiming core_timing; }; -void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0, - int expected_lateness = 0, int cpu_downcount = 0) { - callbacks_ran_flags = 0; - expected_callback = CB_IDS[idx]; - lateness = expected_lateness; - - // Pretend we executed X cycles of instructions. - core_timing.SwitchContext(context); - core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount); - core_timing.Advance(); - core_timing.SwitchContext((context + 1) % 4); - - REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); -} -} // Anonymous namespace - TEST_CASE("CoreTiming[BasicOrder]", "[core]") { ScopeInit guard; auto& core_timing = guard.core_timing; + std::vector> events{ + Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>), + Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>), + Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>), + Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>), + Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>), + }; + + expected_callback = 0; + + core_timing.SyncPause(true); + + u64 one_micro = 1000U; + for (std::size_t i = 0; i < events.size(); i++) { + u64 order = calls_order[i]; + core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]); + } + /// test pause + REQUIRE(callbacks_ran_flags.none()); - std::shared_ptr cb_a = - Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>); - std::shared_ptr cb_b = - Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>); - std::shared_ptr cb_c = - Core::Timing::CreateEvent("callbackC", CallbackTemplate<2>); - std::shared_ptr cb_d = - Core::Timing::CreateEvent("callbackD", CallbackTemplate<3>); - std::shared_ptr cb_e = - Core::Timing::CreateEvent("callbackE", CallbackTemplate<4>); - - // Enter slice 0 - core_timing.ResetRun(); - - // D -> B -> C -> A -> E - core_timing.SwitchContext(0); - core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]); - REQUIRE(1000 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]); - REQUIRE(500 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]); - REQUIRE(500 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]); - REQUIRE(100 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]); - REQUIRE(100 == core_timing.GetDowncount()); - - AdvanceAndCheck(core_timing, 3, 0); - AdvanceAndCheck(core_timing, 1, 1); - AdvanceAndCheck(core_timing, 2, 2); - AdvanceAndCheck(core_timing, 0, 3); - AdvanceAndCheck(core_timing, 4, 0); -} - -TEST_CASE("CoreTiming[FairSharing]", "[core]") { + core_timing.Pause(false); // No need to sync - ScopeInit guard; - auto& core_timing = guard.core_timing; + while (core_timing.HasPendingEvents()) + ; - std::shared_ptr empty_callback = - Core::Timing::CreateEvent("empty_callback", EmptyCallback); + REQUIRE(callbacks_ran_flags.all()); - callbacks_done = 0; - u64 MAX_CALLBACKS = 10; - for (std::size_t i = 0; i < 10; i++) { - core_timing.ScheduleEvent(i * 3333U, empty_callback, 0); + for (std::size_t i = 0; i < delays.size(); i++) { + const double delay = static_cast(delays[i]); + const double micro = delay / 1000.0f; + const double mili = micro / 1000.0f; + printf("HostTimer Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili); } +} - const s64 advances = MAX_SLICE_LENGTH / 10; - core_timing.ResetRun(); - u64 current_time = core_timing.GetTicks(); - bool keep_running{}; - do { - keep_running = false; - for (u32 active_core = 0; active_core < 4; ++active_core) { - core_timing.SwitchContext(active_core); - if (core_timing.CanCurrentContextRun()) { - core_timing.AddTicks(std::min(advances, core_timing.GetDowncount())); - core_timing.Advance(); - } - keep_running |= core_timing.CanCurrentContextRun(); - } - } while (keep_running); - u64 current_time_2 = core_timing.GetTicks(); - - REQUIRE(MAX_CALLBACKS == callbacks_done); - REQUIRE(current_time_2 == current_time + MAX_SLICE_LENGTH * 4); +#pragma optimize("", off) +u64 TestTimerSpeed(Core::Timing::CoreTiming& core_timing) { + u64 start = core_timing.GetGlobalTimeNs().count(); + u64 placebo = 0; + for (std::size_t i = 0; i < 1000; i++) { + placebo += core_timing.GetGlobalTimeNs().count(); + } + u64 end = core_timing.GetGlobalTimeNs().count(); + return (end - start); } +#pragma optimize("", on) -TEST_CASE("Core::Timing[PredictableLateness]", "[core]") { +TEST_CASE("CoreTiming[BasicOrderNoPausing]", "[core]") { ScopeInit guard; auto& core_timing = guard.core_timing; + std::vector> events{ + Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>), + Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>), + Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>), + Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>), + Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>), + }; + + core_timing.SyncPause(true); + core_timing.SyncPause(false); + + expected_callback = 0; + + u64 start = core_timing.GetGlobalTimeNs().count(); + u64 one_micro = 1000U; + for (std::size_t i = 0; i < events.size(); i++) { + u64 order = calls_order[i]; + core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]); + } + u64 end = core_timing.GetGlobalTimeNs().count(); + const double scheduling_time = static_cast(end - start); + const double timer_time = static_cast(TestTimerSpeed(core_timing)); - std::shared_ptr cb_a = - Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>); - std::shared_ptr cb_b = - Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>); + while (core_timing.HasPendingEvents()) + ; - // Enter slice 0 - core_timing.ResetRun(); + REQUIRE(callbacks_ran_flags.all()); - core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]); - core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]); + for (std::size_t i = 0; i < delays.size(); i++) { + const double delay = static_cast(delays[i]); + const double micro = delay / 1000.0f; + const double mili = micro / 1000.0f; + printf("HostTimer No Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili); + } - AdvanceAndCheck(core_timing, 0, 0, 10, -10); // (100 - 10) - AdvanceAndCheck(core_timing, 1, 1, 50, -50); + const double micro = scheduling_time / 1000.0f; + const double mili = micro / 1000.0f; + printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili); + printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f, + timer_time / 1000000.f); } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8eb017f65..482e49711 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" @@ -154,8 +156,7 @@ u64 GPU::GetTicks() const { constexpr u64 gpu_ticks_num = 384; constexpr u64 gpu_ticks_den = 625; - const u64 cpu_ticks = system.CoreTiming().GetTicks(); - u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); + u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); if (Settings::values.use_fast_gpu_time) { nanoseconds /= 256; } diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index bfeb16458..9ceb6c8d7 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -52,6 +52,8 @@ void EmuThread::run() { emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); + Core::System::GetInstance().RegisterHostThread(); + Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources( stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { emit LoadProgress(stage, value, total); @@ -65,28 +67,30 @@ void EmuThread::run() { bool was_active = false; while (!stop_run) { if (running) { - if (!was_active) + if (was_active) { emit DebugModeLeft(); + } - Core::System::ResultStatus result = Core::System::GetInstance().RunLoop(); + running_guard = true; + Core::System::ResultStatus result = Core::System::GetInstance().Run(); if (result != Core::System::ResultStatus::Success) { + running_guard = false; this->SetRunning(false); emit ErrorThrown(result, Core::System::GetInstance().GetStatusDetails()); } + running_wait.Wait(); + result = Core::System::GetInstance().Pause(); + if (result != Core::System::ResultStatus::Success) { + running_guard = false; + this->SetRunning(false); + emit ErrorThrown(result, Core::System::GetInstance().GetStatusDetails()); + } + running_guard = false; - was_active = running || exec_step; - if (!was_active && !stop_run) - emit DebugModeEntered(); - } else if (exec_step) { - if (!was_active) - emit DebugModeLeft(); - - exec_step = false; - Core::System::GetInstance().SingleStep(); + was_active = true; emit DebugModeEntered(); - yieldCurrentThread(); - - was_active = false; + } else if (exec_step) { + UNIMPLEMENTED(); } else { std::unique_lock lock{running_mutex}; running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; }); diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index 3626604ca..768568b3e 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -59,6 +59,11 @@ public: this->running = running; lock.unlock(); running_cv.notify_all(); + if (!running) { + running_wait.Set(); + /// Wait until effectively paused + while (running_guard); + } } /** @@ -84,6 +89,8 @@ private: std::atomic_bool stop_run{false}; std::mutex running_mutex; std::condition_variable running_cv; + Common::Event running_wait{}; + std::atomic_bool running_guard{false}; signals: /** diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index c1ea25fb8..765908c5a 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -59,8 +59,10 @@ std::vector> WaitTreeItem::MakeThreadItemList() std::size_t row = 0; auto add_threads = [&](const std::vector>& threads) { for (std::size_t i = 0; i < threads.size(); ++i) { - item_list.push_back(std::make_unique(*threads[i])); - item_list.back()->row = row; + if (!threads[i]->IsHLEThread()) { + item_list.push_back(std::make_unique(*threads[i])); + item_list.back()->row = row; + } ++row; } }; diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 4d2ea7e9e..1e5377840 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -237,7 +237,7 @@ int main(int argc, char** argv) { std::thread render_thread([&emu_window] { emu_window->Present(); }); while (emu_window->IsOpen()) { - system.RunLoop(); + //system.RunLoop(); } render_thread.join(); diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp index 676e70ebd..1a45506d4 100644 --- a/src/yuzu_tester/yuzu.cpp +++ b/src/yuzu_tester/yuzu.cpp @@ -256,7 +256,7 @@ int main(int argc, char** argv) { system.Renderer().Rasterizer().LoadDiskResources(); while (!finished) { - system.RunLoop(); + //system.RunLoop(); } detached_tasks.WaitForAllTasks(); -- cgit v1.2.3 From 18dcb0934217628711c5b1d22fd6d7635e683e3f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Feb 2020 12:28:55 -0400 Subject: HostTiming: Pause the hardware clock on pause. --- src/common/wall_clock.cpp | 4 ++++ src/common/wall_clock.h | 2 ++ src/common/x64/native_clock.cpp | 7 +++++++ src/common/x64/native_clock.h | 2 ++ src/core/core.cpp | 2 +- src/core/core_timing.cpp | 6 ++++++ src/core/core_timing.h | 1 + 7 files changed, 23 insertions(+), 1 deletion(-) (limited to 'src/common') diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index d4d35f4e7..a46db6bbf 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -53,6 +53,10 @@ public: return Common::Divide128On32(temporary, 1000000000).first; } + void Pause(bool is_paused) override { + // Do nothing in this clock type. + } + private: base_time_point start_time; }; diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index ed284cf50..367d72134 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -28,6 +28,8 @@ public: /// Returns current wall time in emulated cpu cycles virtual u64 GetCPUCycles() = 0; + virtual void Pause(bool is_paused) = 0; + /// Tells if the wall clock, uses the host CPU's hardware clock bool IsNative() const { return is_native; diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 26d4d0ba6..926f92ff8 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -65,6 +65,13 @@ u64 NativeClock::GetRTSC() { return accumulated_ticks; } +void NativeClock::Pause(bool is_paused) { + if (!is_paused) { + _mm_mfence(); + last_measure = __rdtsc(); + } +} + std::chrono::nanoseconds NativeClock::GetTimeNS() { const u64 rtsc_value = GetRTSC(); return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index b58cf9f5a..3851f8fc2 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -26,6 +26,8 @@ public: u64 GetCPUCycles() override; + void Pause(bool is_paused) override; + private: u64 GetRTSC(); diff --git a/src/core/core.cpp b/src/core/core.cpp index e8936b09d..1d6179a80 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -137,8 +137,8 @@ struct System::Impl { ResultStatus Pause() { status = ResultStatus::Success; - kernel.Suspend(true); core_timing.SyncPause(true); + kernel.Suspend(true); cpu_manager.Pause(true); return status; diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index cc32a853b..5a7abcfca 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -77,6 +77,9 @@ void CoreTiming::SyncPause(bool is_paused) { return; } Pause(is_paused); + if (!is_paused) { + pause_event.Set(); + } event.Set(); while (paused_set != is_paused) ; @@ -197,6 +200,9 @@ void CoreTiming::ThreadLoop() { wait_set = false; } paused_set = true; + clock->Pause(true); + pause_event.Wait(); + clock->Pause(false); } } diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 707c8ef0c..c70b605c8 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -136,6 +136,7 @@ private: std::shared_ptr ev_lost; Common::Event event{}; + Common::Event pause_event{}; Common::SpinLock basic_lock{}; Common::SpinLock advance_lock{}; std::unique_ptr timer_thread; -- cgit v1.2.3 From cd1c38be8d15d3caf52f566a9e8dc20504c61068 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 7 Mar 2020 18:59:42 -0400 Subject: ARM/Memory: Correct Exclusive Monitor and Implement Exclusive Memory Writes. --- src/common/CMakeLists.txt | 2 + src/common/atomic_ops.cpp | 70 ++++++++++++++++++++++ src/common/atomic_ops.h | 17 ++++++ src/core/arm/dynarmic/arm_dynarmic_64.cpp | 66 +++++++++++++++++---- src/core/arm/dynarmic/arm_dynarmic_64.h | 6 +- src/core/arm/exclusive_monitor.h | 6 +- src/core/hle/kernel/address_arbiter.cpp | 6 +- src/core/hle/kernel/mutex.cpp | 5 +- src/core/hle/kernel/svc.cpp | 2 +- src/core/hle/kernel/thread.cpp | 6 +- src/core/memory.cpp | 98 +++++++++++++++++++++++++++++++ src/core/memory.h | 65 ++++++++++++++++++++ 12 files changed, 325 insertions(+), 24 deletions(-) create mode 100644 src/common/atomic_ops.cpp create mode 100644 src/common/atomic_ops.h (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 3cc17d0e9..d120c8d3d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -98,6 +98,8 @@ add_library(common STATIC algorithm.h alignment.h assert.h + atomic_ops.cpp + atomic_ops.h detached_tasks.cpp detached_tasks.h bit_field.h diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp new file mode 100644 index 000000000..65cdfb4fd --- /dev/null +++ b/src/common/atomic_ops.cpp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/atomic_ops.h" + +#if _MSC_VER +#include +#endif + +namespace Common { + +#if _MSC_VER + +bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) { + u8 result = _InterlockedCompareExchange8((char*)pointer, value, expected); + return result == expected; +} + +bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) { + u16 result = _InterlockedCompareExchange16((short*)pointer, value, expected); + return result == expected; +} + +bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) { + u32 result = _InterlockedCompareExchange((long*)pointer, value, expected); + return result == expected; +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) { + u64 result = _InterlockedCompareExchange64((__int64*)pointer, value, expected); + return result == expected; +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { + return _InterlockedCompareExchange128((__int64*)pointer, value[1], value[0], (__int64*)expected.data()) != 0; +} + + +#else + +bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) { + return __sync_bool_compare_and_swap (pointer, value, expected); +} + +bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) { + return __sync_bool_compare_and_swap (pointer, value, expected); +} + +bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) { + return __sync_bool_compare_and_swap (pointer, value, expected); +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) { + return __sync_bool_compare_and_swap (pointer, value, expected); +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { + unsigned __int128 value_a; + unsigned __int128 expected_a; + std::memcpy(&value_a, value.data(), sizeof(u128)); + std::memcpy(&expected_a, expected.data(), sizeof(u128)); + return __sync_bool_compare_and_swap ((unsigned __int128*)pointer, value_a, expected_a); +} + +#endif + +} // namespace Common diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h new file mode 100644 index 000000000..22cb3a402 --- /dev/null +++ b/src/common/atomic_ops.h @@ -0,0 +1,17 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Common { + +bool AtomicCompareAndSwap(u8 volatile * pointer, u8 value, u8 expected); +bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected); +bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected); +bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected); +bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected); + +} // namespace Common diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 5e316ffd4..a22c22bf0 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -66,6 +66,22 @@ public: memory.Write64(vaddr + 8, value[1]); } + bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override { + return parent.system.Memory().WriteExclusive8(vaddr, value, expected); + } + bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override { + return parent.system.Memory().WriteExclusive16(vaddr, value, expected); + } + bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override { + return parent.system.Memory().WriteExclusive32(vaddr, value, expected); + } + bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override { + return parent.system.Memory().WriteExclusive64(vaddr, value, expected); + } + bool MemoryWriteExclusive128(u64 vaddr, Vector value, Vector expected) override { + return parent.system.Memory().WriteExclusive128(vaddr, value, expected); + } + void InterpreterFallback(u64 pc, std::size_t num_instructions) override { LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc, num_instructions, MemoryReadCode(pc)); @@ -284,9 +300,29 @@ DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std:: DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; -void DynarmicExclusiveMonitor::SetExclusive(std::size_t core_index, VAddr addr) { - // Size doesn't actually matter. - monitor.Mark(core_index, addr, 16); +void DynarmicExclusiveMonitor::SetExclusive8(std::size_t core_index, VAddr addr) { + monitor.Mark(core_index, addr, 1, [&]() -> u8 { return memory.Read8(addr); }); +} + +void DynarmicExclusiveMonitor::SetExclusive16(std::size_t core_index, VAddr addr) { + monitor.Mark(core_index, addr, 2, [&]() -> u16 { return memory.Read16(addr); }); +} + +void DynarmicExclusiveMonitor::SetExclusive32(std::size_t core_index, VAddr addr) { + monitor.Mark(core_index, addr, 4, [&]() -> u32 { return memory.Read32(addr); }); +} + +void DynarmicExclusiveMonitor::SetExclusive64(std::size_t core_index, VAddr addr) { + monitor.Mark(core_index, addr, 8, [&]() -> u64 { return memory.Read64(addr); }); +} + +void DynarmicExclusiveMonitor::SetExclusive128(std::size_t core_index, VAddr addr) { + monitor.Mark(core_index, addr, 16, [&]() -> u128 { + u128 result; + result[0] = memory.Read64(addr); + result[1] = memory.Read64(addr + 8); + return result; + }); } void DynarmicExclusiveMonitor::ClearExclusive() { @@ -294,28 +330,32 @@ void DynarmicExclusiveMonitor::ClearExclusive() { } bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, 1, [&] { memory.Write8(vaddr, value); }); + return monitor.DoExclusiveOperation(core_index, vaddr, 1, [&](u8 expected) -> bool { + return memory.WriteExclusive8(vaddr, value, expected); + }); } bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, 2, - [&] { memory.Write16(vaddr, value); }); + return monitor.DoExclusiveOperation(core_index, vaddr, 2, [&](u16 expected) -> bool { + return memory.WriteExclusive16(vaddr, value, expected); + }); } bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, 4, - [&] { memory.Write32(vaddr, value); }); + return monitor.DoExclusiveOperation(core_index, vaddr, 4, [&](u32 expected) -> bool { + return memory.WriteExclusive32(vaddr, value, expected); + }); } bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, 8, - [&] { memory.Write64(vaddr, value); }); + return monitor.DoExclusiveOperation(core_index, vaddr, 8, [&](u64 expected) -> bool { + return memory.WriteExclusive64(vaddr, value, expected); + }); } bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] { - memory.Write64(vaddr + 0, value[0]); - memory.Write64(vaddr + 8, value[1]); + return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&](u128 expected) -> bool { + return memory.WriteExclusive128(vaddr, value, expected); }); } diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index 9e94b58c2..3ead59f16 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -82,7 +82,11 @@ public: explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count); ~DynarmicExclusiveMonitor() override; - void SetExclusive(std::size_t core_index, VAddr addr) override; + void SetExclusive8(std::size_t core_index, VAddr addr) override; + void SetExclusive16(std::size_t core_index, VAddr addr) override; + void SetExclusive32(std::size_t core_index, VAddr addr) override; + void SetExclusive64(std::size_t core_index, VAddr addr) override; + void SetExclusive128(std::size_t core_index, VAddr addr) override; void ClearExclusive() override; bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override; diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h index ccd73b80f..2ee312eee 100644 --- a/src/core/arm/exclusive_monitor.h +++ b/src/core/arm/exclusive_monitor.h @@ -18,7 +18,11 @@ class ExclusiveMonitor { public: virtual ~ExclusiveMonitor(); - virtual void SetExclusive(std::size_t core_index, VAddr addr) = 0; + virtual void SetExclusive8(std::size_t core_index, VAddr addr) = 0; + virtual void SetExclusive16(std::size_t core_index, VAddr addr) = 0; + virtual void SetExclusive32(std::size_t core_index, VAddr addr) = 0; + virtual void SetExclusive64(std::size_t core_index, VAddr addr) = 0; + virtual void SetExclusive128(std::size_t core_index, VAddr addr) = 0; virtual void ClearExclusive() = 0; virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0; diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index ebabde921..07acabc1d 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -90,7 +90,7 @@ ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 auto& monitor = system.Monitor(); u32 current_value; do { - monitor.SetExclusive(current_core, address); + monitor.SetExclusive32(current_core, address); current_value = memory.Read32(address); if (current_value != value) { @@ -120,7 +120,7 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a auto& monitor = system.Monitor(); s32 updated_value; do { - monitor.SetExclusive(current_core, address); + monitor.SetExclusive32(current_core, address); updated_value = memory.Read32(address); if (updated_value != value) { @@ -191,7 +191,7 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6 const std::size_t current_core = system.CurrentCoreIndex(); auto& monitor = system.Monitor(); do { - monitor.SetExclusive(current_core, address); + monitor.SetExclusive32(current_core, address); current_value = static_cast(memory.Read32(address)); if (should_decrement) { decrement_value = current_value - 1; diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp index ebe3f6050..16c95782a 100644 --- a/src/core/hle/kernel/mutex.cpp +++ b/src/core/hle/kernel/mutex.cpp @@ -10,6 +10,7 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/arm/exclusive_monitor.h" +#include "core/core.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" @@ -138,7 +139,7 @@ std::pair> Mutex::Unlock(std::shared_ptr> Mutex::Unlock(std::shared_ptrResumeFromWait(); do { - monitor.SetExclusive(current_core, address); + monitor.SetExclusive32(current_core, address); } while (!monitor.ExclusiveWrite32(current_core, address, mutex_value)); return {RESULT_SUCCESS, new_owner}; } diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index da2f90a1d..371beed0d 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1641,7 +1641,7 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_ u32 update_val = 0; const VAddr mutex_address = thread->GetMutexWaitAddress(); do { - monitor.SetExclusive(current_core, mutex_address); + monitor.SetExclusive32(current_core, mutex_address); // If the mutex is not yet acquired, acquire it. mutex_val = memory.Read32(mutex_address); diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index b99e3b7a5..51cc5dcca 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -236,7 +236,7 @@ ResultVal> Thread::Create(Core::System& system, ThreadTy ResetThreadContext64(thread->context_64, stack_top, entry_point, arg); } thread->host_context = - std::make_shared(std::move(thread_start_func), thread_start_parameter); + std::make_shared(std::move(thread_start_func), thread_start_parameter); return MakeResult>(std::move(thread)); } @@ -412,12 +412,12 @@ ResultCode Thread::SetActivity(ThreadActivity value) { } if (value == ThreadActivity::Paused) { - if (pausing_state & static_cast(ThreadSchedFlags::ThreadPauseFlag) != 0) { + if ((pausing_state & static_cast(ThreadSchedFlags::ThreadPauseFlag)) != 0) { return ERR_INVALID_STATE; } AddSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag); } else { - if (pausing_state & static_cast(ThreadSchedFlags::ThreadPauseFlag) == 0) { + if ((pausing_state & static_cast(ThreadSchedFlags::ThreadPauseFlag)) == 0) { return ERR_INVALID_STATE; } RemoveSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 66634596d..4cb5d05e5 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -8,6 +8,7 @@ #include #include "common/assert.h" +#include "common/atomic_ops.h" #include "common/common_types.h" #include "common/logging/log.h" #include "common/page_table.h" @@ -176,6 +177,22 @@ struct Memory::Impl { } } + bool WriteExclusive8(const VAddr addr, const u8 data, const u8 expected) { + return WriteExclusive(addr, data, expected); + } + + bool WriteExclusive16(const VAddr addr, const u16 data, const u16 expected) { + return WriteExclusive(addr, data, expected); + } + + bool WriteExclusive32(const VAddr addr, const u32 data, const u32 expected) { + return WriteExclusive(addr, data, expected); + } + + bool WriteExclusive64(const VAddr addr, const u64 data, const u64 expected) { + return WriteExclusive(addr, data, expected); + } + std::string ReadCString(VAddr vaddr, std::size_t max_length) { std::string string; string.reserve(max_length); @@ -679,6 +696,67 @@ struct Memory::Impl { } } + template + bool WriteExclusive(const VAddr vaddr, const T data, const T expected) { + u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; + if (page_pointer != nullptr) { + // NOTE: Avoid adding any extra logic to this fast-path block + T volatile* pointer = reinterpret_cast(&page_pointer[vaddr]); + return Common::AtomicCompareAndSwap(pointer, data, expected); + } + + const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; + switch (type) { + case Common::PageType::Unmapped: + LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, + static_cast(data), vaddr); + return true; + case Common::PageType::Memory: + ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); + break; + case Common::PageType::RasterizerCachedMemory: { + u8* host_ptr{GetPointerFromVMA(vaddr)}; + system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T)); + T volatile* pointer = reinterpret_cast(&host_ptr); + return Common::AtomicCompareAndSwap(pointer, data, expected); + break; + } + default: + UNREACHABLE(); + } + return true; + } + + bool WriteExclusive128(const VAddr vaddr, const u128 data, const u128 expected) { + u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; + if (page_pointer != nullptr) { + // NOTE: Avoid adding any extra logic to this fast-path block + u64 volatile* pointer = reinterpret_cast(&page_pointer[vaddr]); + return Common::AtomicCompareAndSwap(pointer, data, expected); + } + + const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; + switch (type) { + case Common::PageType::Unmapped: + LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8, + static_cast(data[1]), static_cast(data[0]), vaddr); + return true; + case Common::PageType::Memory: + ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); + break; + case Common::PageType::RasterizerCachedMemory: { + u8* host_ptr{GetPointerFromVMA(vaddr)}; + system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(u128)); + u64 volatile* pointer = reinterpret_cast(&host_ptr); + return Common::AtomicCompareAndSwap(pointer, data, expected); + break; + } + default: + UNREACHABLE(); + } + return true; + } + Common::PageTable* current_page_table = nullptr; Core::System& system; }; @@ -761,6 +839,26 @@ void Memory::Write64(VAddr addr, u64 data) { impl->Write64(addr, data); } +bool Memory::WriteExclusive8(VAddr addr, u8 data, u8 expected) { + return impl->WriteExclusive8(addr, data, expected); +} + +bool Memory::WriteExclusive16(VAddr addr, u16 data, u16 expected) { + return impl->WriteExclusive16(addr, data, expected); +} + +bool Memory::WriteExclusive32(VAddr addr, u32 data, u32 expected) { + return impl->WriteExclusive32(addr, data, expected); +} + +bool Memory::WriteExclusive64(VAddr addr, u64 data, u64 expected) { + return impl->WriteExclusive64(addr, data, expected); +} + +bool Memory::WriteExclusive128(VAddr addr, u128 data, u128 expected) { + return impl->WriteExclusive128(addr, data, expected); +} + std::string Memory::ReadCString(VAddr vaddr, std::size_t max_length) { return impl->ReadCString(vaddr, max_length); } diff --git a/src/core/memory.h b/src/core/memory.h index 93f0c1d6c..4a1cc63f4 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -244,6 +244,71 @@ public: */ void Write64(VAddr addr, u64 data); + /** + * Writes a 8-bit unsigned integer to the given virtual address in + * the current process' address space if and only if the address contains + * the expected value. This operation is atomic. + * + * @param addr The virtual address to write the 8-bit unsigned integer to. + * @param data The 8-bit unsigned integer to write to the given virtual address. + * @param expected The 8-bit unsigned integer to check against the given virtual address. + * + * @post The memory range [addr, sizeof(data)) contains the given data value. + */ + bool WriteExclusive8(VAddr addr, u8 data, u8 expected); + + /** + * Writes a 16-bit unsigned integer to the given virtual address in + * the current process' address space if and only if the address contains + * the expected value. This operation is atomic. + * + * @param addr The virtual address to write the 16-bit unsigned integer to. + * @param data The 16-bit unsigned integer to write to the given virtual address. + * @param expected The 16-bit unsigned integer to check against the given virtual address. + * + * @post The memory range [addr, sizeof(data)) contains the given data value. + */ + bool WriteExclusive16(VAddr addr, u16 data, u16 expected); + + /** + * Writes a 32-bit unsigned integer to the given virtual address in + * the current process' address space if and only if the address contains + * the expected value. This operation is atomic. + * + * @param addr The virtual address to write the 32-bit unsigned integer to. + * @param data The 32-bit unsigned integer to write to the given virtual address. + * @param expected The 32-bit unsigned integer to check against the given virtual address. + * + * @post The memory range [addr, sizeof(data)) contains the given data value. + */ + bool WriteExclusive32(VAddr addr, u32 data, u32 expected); + + /** + * Writes a 64-bit unsigned integer to the given virtual address in + * the current process' address space if and only if the address contains + * the expected value. This operation is atomic. + * + * @param addr The virtual address to write the 64-bit unsigned integer to. + * @param data The 64-bit unsigned integer to write to the given virtual address. + * @param expected The 64-bit unsigned integer to check against the given virtual address. + * + * @post The memory range [addr, sizeof(data)) contains the given data value. + */ + bool WriteExclusive64(VAddr addr, u64 data, u64 expected); + + /** + * Writes a 128-bit unsigned integer to the given virtual address in + * the current process' address space if and only if the address contains + * the expected value. This operation is atomic. + * + * @param addr The virtual address to write the 128-bit unsigned integer to. + * @param data The 128-bit unsigned integer to write to the given virtual address. + * @param expected The 128-bit unsigned integer to check against the given virtual address. + * + * @post The memory range [addr, sizeof(data)) contains the given data value. + */ + bool WriteExclusive128(VAddr addr, u128 data, u128 expected); + /** * Reads a null-terminated string from the given virtual address. * This function will continually read characters until either: -- cgit v1.2.3 From 534466754f381e90f5f6475a0c02031242a5c256 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 21 Mar 2020 12:23:13 -0400 Subject: X64 Clock: Reduce accuracy to be less or equal to guest accuracy. --- src/common/x64/native_clock.cpp | 3 ++- src/common/x64/native_clock.h | 5 +++++ src/core/arm/dynarmic/arm_dynarmic_64.cpp | 3 +++ 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'src/common') diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 926f92ff8..f1bc60fd2 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -62,7 +62,8 @@ u64 NativeClock::GetRTSC() { } accumulated_ticks += diff; rtsc_serialize.unlock(); - return accumulated_ticks; + /// The clock cannot be more precise than the guest timer, remove the lower bits + return accumulated_ticks & inaccuracy_mask; } void NativeClock::Pause(bool is_paused) { diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 3851f8fc2..e853094d2 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -31,6 +31,11 @@ public: private: u64 GetRTSC(); + /// value used to reduce the native clocks accuracy as some apss rely on + /// undefined behavior where the level of accuracy in the clock shouldn't + /// be higher. + static constexpr u64 inaccuracy_mask = ~(0x100 - 1); + SpinLock rtsc_serialize{}; u64 last_measure{}; u64 accumulated_ticks{}; diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 547a6e07e..3f18dede2 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -184,6 +184,9 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable& config.enable_fast_dispatch = false; } + // CNTPCT uses wall clock. + config.wall_clock_cntpct = true; + return std::make_shared(config); } -- cgit v1.2.3 From 528b19a84287167d7699465e495b196d216b99db Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 5 Apr 2020 09:48:53 -0400 Subject: General: Tune the priority of main emulation threads so they have higher priority than less important helper threads. --- src/common/thread.cpp | 46 +++++++++++++++++++++++++ src/common/thread.h | 9 +++++ src/core/core_timing.cpp | 1 + src/core/cpu_manager.cpp | 1 + src/video_core/gpu_thread.cpp | 1 + src/video_core/renderer_vulkan/vk_scheduler.cpp | 2 ++ 6 files changed, 60 insertions(+) (limited to 'src/common') diff --git a/src/common/thread.cpp b/src/common/thread.cpp index c9684aed9..33c8437f5 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -25,6 +25,52 @@ namespace Common { +#ifdef _WIN32 + +void SetCurrentThreadPriority(ThreadPriority new_priority) { + auto handle = GetCurrentThread(); + int windows_priority = 0; + switch (new_priority) { + case ThreadPriority::Low: + windows_priority = THREAD_PRIORITY_BELOW_NORMAL; + break; + case ThreadPriority::Normal: + windows_priority = THREAD_PRIORITY_NORMAL; + break; + case ThreadPriority::High: + windows_priority = THREAD_PRIORITY_ABOVE_NORMAL; + break; + case ThreadPriority::VeryHigh: + windows_priority = THREAD_PRIORITY_HIGHEST; + break; + default: + windows_priority = THREAD_PRIORITY_NORMAL; + break; + } + SetThreadPriority(handle, windows_priority); +} + +#else + +void SetCurrentThreadPriority(ThreadPriority new_priority) { + pthread_t this_thread = pthread_self(); + + s32 max_prio = sched_get_priority_max(SCHED_OTHER); + s32 min_prio = sched_get_priority_min(SCHED_OTHER); + u32 level = static_cast(new_priority) + 1; + + struct sched_param params; + if (max_prio > min_prio) { + params.sched_priority = min_prio + ((max_prio - min_prio) * level) / 4; + } else { + params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4; + } + + pthread_setschedparam(this_thread, SCHED_OTHER, ¶ms); +} + +#endif + #ifdef _MSC_VER // Sets the debugger-visible name of the current thread. diff --git a/src/common/thread.h b/src/common/thread.h index 127cc7e23..52b359413 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -86,6 +86,15 @@ private: std::size_t generation = 0; // Incremented once each time the barrier is used }; +enum class ThreadPriority : u32 { + Low = 0, + Normal = 1, + High = 2, + VeryHigh = 3, +}; + +void SetCurrentThreadPriority(ThreadPriority new_priority); + void SetCurrentThreadName(const char* name); } // namespace Common diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index b02119494..032b29e33 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -48,6 +48,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) { std::string name = "yuzu:HostTiming"; MicroProfileOnThreadCreate(name.c_str()); Common::SetCurrentThreadName(name.c_str()); + Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh); instance.on_thread_init(); instance.ThreadLoop(); } diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 63c578852..32afcf3ae 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -337,6 +337,7 @@ void CpuManager::RunThread(std::size_t core) { } MicroProfileOnThreadCreate(name.c_str()); Common::SetCurrentThreadName(name.c_str()); + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); auto& data = core_data[core]; data.enter_barrier = std::make_unique(); data.exit_barrier = std::make_unique(); diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 323185bfc..738c6f0c1 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -22,6 +22,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, std::string name = "yuzu:GPU"; MicroProfileOnThreadCreate(name.c_str()); Common::SetCurrentThreadName(name.c_str()); + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); system.RegisterHostThread(); // Wait for first GPU command before acquiring the window context diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 82ec9180e..56524e6f3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -9,6 +9,7 @@ #include #include "common/microprofile.h" +#include "common/thread.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" @@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { } void VKScheduler::WorkerThread() { + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); std::unique_lock lock{mutex}; do { cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); -- cgit v1.2.3 From 467d43570e10b98fa33067352d35fe62ceb3cb9e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 8 May 2020 18:53:13 -0400 Subject: Clang Format. --- src/common/atomic_ops.cpp | 14 +++++++------- src/common/atomic_ops.h | 2 +- src/common/thread.cpp | 30 +++++++++++++++--------------- src/core/arm/unicorn/arm_unicorn.cpp | 4 ++-- src/core/arm/unicorn/arm_unicorn.h | 4 ++-- src/core/core.h | 2 -- src/core/hle/kernel/mutex.cpp | 11 +++++------ src/core/hle/kernel/mutex.h | 3 ++- src/core/hle/kernel/physical_core.h | 7 +++---- src/core/hle/kernel/server_session.cpp | 2 +- src/yuzu/bootmanager.cpp | 1 - src/yuzu/bootmanager.h | 3 ++- src/yuzu/debugger/wait_tree.cpp | 5 +++-- src/yuzu/main.cpp | 9 ++++++--- 14 files changed, 49 insertions(+), 48 deletions(-) (limited to 'src/common') diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp index 65cdfb4fd..6b2236114 100644 --- a/src/common/atomic_ops.cpp +++ b/src/common/atomic_ops.cpp @@ -35,26 +35,26 @@ bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) { } bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { - return _InterlockedCompareExchange128((__int64*)pointer, value[1], value[0], (__int64*)expected.data()) != 0; + return _InterlockedCompareExchange128((__int64*)pointer, value[1], value[0], + (__int64*)expected.data()) != 0; } - #else bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) { - return __sync_bool_compare_and_swap (pointer, value, expected); + return __sync_bool_compare_and_swap(pointer, value, expected); } bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) { - return __sync_bool_compare_and_swap (pointer, value, expected); + return __sync_bool_compare_and_swap(pointer, value, expected); } bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) { - return __sync_bool_compare_and_swap (pointer, value, expected); + return __sync_bool_compare_and_swap(pointer, value, expected); } bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) { - return __sync_bool_compare_and_swap (pointer, value, expected); + return __sync_bool_compare_and_swap(pointer, value, expected); } bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { @@ -62,7 +62,7 @@ bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { unsigned __int128 expected_a; std::memcpy(&value_a, value.data(), sizeof(u128)); std::memcpy(&expected_a, expected.data(), sizeof(u128)); - return __sync_bool_compare_and_swap ((unsigned __int128*)pointer, value_a, expected_a); + return __sync_bool_compare_and_swap((unsigned __int128*)pointer, value_a, expected_a); } #endif diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h index 22cb3a402..e6181d521 100644 --- a/src/common/atomic_ops.h +++ b/src/common/atomic_ops.h @@ -8,7 +8,7 @@ namespace Common { -bool AtomicCompareAndSwap(u8 volatile * pointer, u8 value, u8 expected); +bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected); bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected); bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected); bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected); diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 33c8437f5..8e5935e6a 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -31,21 +31,21 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { auto handle = GetCurrentThread(); int windows_priority = 0; switch (new_priority) { - case ThreadPriority::Low: - windows_priority = THREAD_PRIORITY_BELOW_NORMAL; - break; - case ThreadPriority::Normal: - windows_priority = THREAD_PRIORITY_NORMAL; - break; - case ThreadPriority::High: - windows_priority = THREAD_PRIORITY_ABOVE_NORMAL; - break; - case ThreadPriority::VeryHigh: - windows_priority = THREAD_PRIORITY_HIGHEST; - break; - default: - windows_priority = THREAD_PRIORITY_NORMAL; - break; + case ThreadPriority::Low: + windows_priority = THREAD_PRIORITY_BELOW_NORMAL; + break; + case ThreadPriority::Normal: + windows_priority = THREAD_PRIORITY_NORMAL; + break; + case ThreadPriority::High: + windows_priority = THREAD_PRIORITY_ABOVE_NORMAL; + break; + case ThreadPriority::VeryHigh: + windows_priority = THREAD_PRIORITY_HIGHEST; + break; + default: + windows_priority = THREAD_PRIORITY_NORMAL; + break; } SetThreadPriority(handle, windows_priority); } diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 9f9690454..35e8f42e8 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -63,8 +63,8 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si return false; } -ARM_Unicorn::ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, - bool uses_wall_clock, Arch architecture, std::size_t core_index) +ARM_Unicorn::ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, + Arch architecture, std::size_t core_index) : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, core_index{core_index} { const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64; CHECKED(uc_open(arch, UC_MODE_ARM, &uc)); diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 9b7d7f6c2..8ace8b86f 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -20,8 +20,8 @@ public: AArch64, // 64-bit ARM }; - explicit ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, - bool uses_wall_clock, Arch architecture, std::size_t core_index); + explicit ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, + Arch architecture, std::size_t core_index); ~ARM_Unicorn() override; void SetPC(u64 pc) override; diff --git a/src/core/core.h b/src/core/core.h index 87df79d57..d2d1fcc5b 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -148,8 +148,6 @@ public: */ ResultStatus Pause(); - - /** * Step the CPU one instruction * @return Result status, indicating whether or not the operation succeeded. diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp index 32dc1ffae..8f6c944d1 100644 --- a/src/core/hle/kernel/mutex.cpp +++ b/src/core/hle/kernel/mutex.cpp @@ -9,7 +9,6 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" -#include "core/core.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" @@ -126,11 +125,11 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle, std::pair> Mutex::Unlock(std::shared_ptr owner, VAddr address) { - // The mutex address must be 4-byte aligned - if ((address % sizeof(u32)) != 0) { - LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address); - return {ERR_INVALID_ADDRESS, nullptr}; - } + // The mutex address must be 4-byte aligned + if ((address % sizeof(u32)) != 0) { + LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address); + return {ERR_INVALID_ADDRESS, nullptr}; + } auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address); if (new_owner == nullptr) { diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h index bce06ecea..3b81dc3df 100644 --- a/src/core/hle/kernel/mutex.h +++ b/src/core/hle/kernel/mutex.h @@ -29,7 +29,8 @@ public: Handle requesting_thread_handle); /// Unlocks a mutex for owner at address - std::pair> Unlock(std::shared_ptr owner, VAddr address); + std::pair> Unlock(std::shared_ptr owner, + VAddr address); /// Releases the mutex at the specified address. ResultCode Release(VAddr address); diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h index 751b994a7..85f6dec05 100644 --- a/src/core/hle/kernel/physical_core.h +++ b/src/core/hle/kernel/physical_core.h @@ -10,7 +10,7 @@ #include "core/arm/cpu_interrupt_handler.h" namespace Common { - class SpinLock; +class SpinLock; } namespace Kernel { @@ -27,9 +27,8 @@ namespace Kernel { class PhysicalCore { public: - PhysicalCore(Core::System& system, std::size_t id, - Kernel::Scheduler& scheduler, - Core::CPUInterruptHandler& interrupt_handler); + PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler, + Core::CPUInterruptHandler& interrupt_handler); ~PhysicalCore(); PhysicalCore(const PhysicalCore&) = delete; diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index e988a3f22..7b23a6889 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -17,9 +17,9 @@ #include "core/hle/kernel/hle_ipc.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/server_session.h" #include "core/hle/kernel/session.h" -#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" #include "core/memory.h" diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 5f93bd432..4bfce48a4 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -71,7 +71,6 @@ void EmuThread::run() { gpu.ReleaseContext(); - // Holds whether the cpu was running during the last iteration, // so that the DebugModeLeft signal can be emitted before the // next execution step diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index 768568b3e..6c59b4d5c 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -62,7 +62,8 @@ public: if (!running) { running_wait.Set(); /// Wait until effectively paused - while (running_guard); + while (running_guard) + ; } } diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 0226ae2e2..9bb0a0109 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -127,11 +127,12 @@ std::vector> WaitTreeCallstack::GetChildren() cons return list; } - auto backtrace = Core::ARM_Interface::GetBacktraceFromContext(Core::System::GetInstance(), thread.GetContext64()); + auto backtrace = Core::ARM_Interface::GetBacktraceFromContext(Core::System::GetInstance(), + thread.GetContext64()); for (auto& entry : backtrace) { std::string s = fmt::format("{:20}{:016X} {:016X} {:016X} {}", entry.module, entry.address, - entry.original_address, entry.offset, entry.name); + entry.original_address, entry.offset, entry.name); list.push_back(std::make_unique(QString::fromStdString(s))); } diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index f1d9ec326..53790c89c 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -534,7 +534,8 @@ void GMainWindow::InitializeWidgets() { if (emulation_running) { return; } - bool is_async = !Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core; + bool is_async = + !Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core; Settings::values.use_asynchronous_gpu_emulation = is_async; async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); Settings::Apply(); @@ -552,7 +553,8 @@ void GMainWindow::InitializeWidgets() { return; } Settings::values.use_multi_core = !Settings::values.use_multi_core; - bool is_async = Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core; + bool is_async = + Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core; Settings::values.use_asynchronous_gpu_emulation = is_async; async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); multicore_status_button->setChecked(Settings::values.use_multi_core); @@ -1958,7 +1960,8 @@ void GMainWindow::OnConfigure() { dock_status_button->setChecked(Settings::values.use_docked_mode); multicore_status_button->setChecked(Settings::values.use_multi_core); - Settings::values.use_asynchronous_gpu_emulation = Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core; + Settings::values.use_asynchronous_gpu_emulation = + Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core; async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); #ifdef HAS_VULKAN -- cgit v1.2.3 From 7b1804dab40fabfbf73157cd5ce76793de1375ee Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 14 May 2020 14:44:03 -0400 Subject: Common/AtomicOps: Correct GCC Intrinsic argument ordering. --- src/common/atomic_ops.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/common') diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp index 6b2236114..1098e21ff 100644 --- a/src/common/atomic_ops.cpp +++ b/src/common/atomic_ops.cpp @@ -42,19 +42,19 @@ bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { #else bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) { - return __sync_bool_compare_and_swap(pointer, value, expected); + return __sync_bool_compare_and_swap(pointer, expected, value); } bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) { - return __sync_bool_compare_and_swap(pointer, value, expected); + return __sync_bool_compare_and_swap(pointer, expected, value); } bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) { - return __sync_bool_compare_and_swap(pointer, value, expected); + return __sync_bool_compare_and_swap(pointer, expected, value); } bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) { - return __sync_bool_compare_and_swap(pointer, value, expected); + return __sync_bool_compare_and_swap(pointer, expected, value); } bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { @@ -62,7 +62,7 @@ bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { unsigned __int128 expected_a; std::memcpy(&value_a, value.data(), sizeof(u128)); std::memcpy(&expected_a, expected.data(), sizeof(u128)); - return __sync_bool_compare_and_swap((unsigned __int128*)pointer, value_a, expected_a); + return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); } #endif -- cgit v1.2.3 From 31651523968312433ebd267a74e86689107a7023 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 18 May 2020 13:08:53 -0400 Subject: Common/NativeClockx86: Reduce native clock accuracy further. --- src/common/x64/native_clock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/common') diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index e853094d2..891a3bbfd 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -34,7 +34,7 @@ private: /// value used to reduce the native clocks accuracy as some apss rely on /// undefined behavior where the level of accuracy in the clock shouldn't /// be higher. - static constexpr u64 inaccuracy_mask = ~(0x100 - 1); + static constexpr u64 inaccuracy_mask = ~(0x400 - 1); SpinLock rtsc_serialize{}; u64 last_measure{}; -- cgit v1.2.3 From 7fd7d05838b88e9dd63a7329e29ea355669a5f18 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 29 May 2020 17:37:37 -0400 Subject: Common/Kernel: Corrections and small bug fixing. --- src/common/wall_clock.cpp | 7 +------ src/core/hle/kernel/svc.cpp | 4 ++-- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'src/common') diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index a46db6bbf..3afbdb898 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -68,12 +68,7 @@ std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, const auto& caps = GetCPUCaps(); u64 rtsc_frequency = 0; if (caps.invariant_tsc) { - if (caps.base_frequency != 0) { - rtsc_frequency = static_cast(caps.base_frequency) * 1000000U; - } - if (rtsc_frequency == 0) { - rtsc_frequency = EstimateRDTSCFrequency(); - } + rtsc_frequency = EstimateRDTSCFrequency(); } if (rtsc_frequency == 0) { return std::make_unique(emulated_cpu_frequency, diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 781032cd1..013ae9e34 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -344,9 +344,9 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) { SchedulerLock lock(system.Kernel()); auto* sync_object = thread->GetHLESyncObject(); sync_object->RemoveWaitingThread(SharedFrom(thread)); - - thread->InvokeHLECallback(SharedFrom(thread)); } + + thread->InvokeHLECallback(SharedFrom(thread)); } return thread->GetSignalingResult(); -- cgit v1.2.3 From 2f8947583f2f0af4058600243d6c1d244e3c4890 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 27 Jun 2020 18:20:06 -0400 Subject: Core/Common: Address Feedback. --- src/common/fiber.cpp | 10 +++------- src/common/spin_lock.cpp | 6 +++--- src/common/spin_lock.h | 5 +++++ src/common/x64/native_clock.cpp | 4 ++-- src/core/arm/arm_interface.h | 2 +- src/core/arm/cpu_interrupt_handler.h | 2 +- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 5 +++-- src/core/arm/dynarmic/arm_dynarmic_32.h | 2 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 5 +++-- src/core/arm/dynarmic/arm_dynarmic_64.h | 2 +- src/core/arm/unicorn/arm_unicorn.cpp | 2 +- src/core/arm/unicorn/arm_unicorn.h | 2 +- src/core/core.cpp | 2 +- src/core/core.h | 4 ++-- src/core/core_timing.cpp | 28 ++++++++++++---------------- src/core/hle/kernel/kernel.cpp | 14 +++++--------- src/core/hle/kernel/physical_core.cpp | 4 ++++ src/core/hle/kernel/physical_core.h | 7 ++----- src/core/hle/kernel/scheduler.cpp | 4 ++-- src/core/hle/kernel/scheduler.h | 4 ++++ src/tests/common/fibers.cpp | 2 +- 21 files changed, 58 insertions(+), 58 deletions(-) (limited to 'src/common') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index f97ad433b..1c1d09ccb 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -54,9 +54,7 @@ Fiber::Fiber(std::function&& entry_point_func, void* start_paramete impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this); } -Fiber::Fiber() { - impl = std::make_unique(); -} +Fiber::Fiber() : impl{std::make_unique()} {} Fiber::~Fiber() { if (released) { @@ -116,8 +114,8 @@ std::shared_ptr Fiber::ThreadToFiber() { struct Fiber::FiberImpl { alignas(64) std::array stack; - u8* stack_limit; alignas(64) std::array rewind_stack; + u8* stack_limit; u8* rewind_stack_limit; boost::context::detail::fcontext_t context; boost::context::detail::fcontext_t rewind_context; @@ -168,9 +166,7 @@ void Fiber::SetRewindPoint(std::function&& rewind_func, void* start rewind_parameter = start_parameter; } -Fiber::Fiber() { - impl = std::make_unique(); -} +Fiber::Fiber() : impl{std::make_unique()} {} Fiber::~Fiber() { if (released) { diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp index c7b46aac6..c1524220f 100644 --- a/src/common/spin_lock.cpp +++ b/src/common/spin_lock.cpp @@ -20,7 +20,7 @@ namespace { -void thread_pause() { +void ThreadPause() { #if __x86_64__ _mm_pause(); #elif __aarch64__ && _MSC_VER @@ -30,13 +30,13 @@ void thread_pause() { #endif } -} // namespace +} // Anonymous namespace namespace Common { void SpinLock::lock() { while (lck.test_and_set(std::memory_order_acquire)) { - thread_pause(); + ThreadPause(); } } diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h index 70282a961..1df5528c4 100644 --- a/src/common/spin_lock.h +++ b/src/common/spin_lock.h @@ -8,6 +8,11 @@ namespace Common { +/** + * SpinLock class + * a lock similar to mutex that forces a thread to spin wait instead calling the + * supervisor. Should be used on short sequences of code. + */ class SpinLock { public: void lock(); diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index f1bc60fd2..424b39b1f 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #ifdef _MSC_VER @@ -52,7 +53,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequenc } u64 NativeClock::GetRTSC() { - rtsc_serialize.lock(); + std::scoped_lock scope{rtsc_serialize}; _mm_mfence(); const u64 current_measure = __rdtsc(); u64 diff = current_measure - last_measure; @@ -61,7 +62,6 @@ u64 NativeClock::GetRTSC() { last_measure = current_measure; } accumulated_ticks += diff; - rtsc_serialize.unlock(); /// The clock cannot be more precise than the guest timer, remove the lower bits return accumulated_ticks & inaccuracy_mask; } diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 0c1d6ac39..1f24051e4 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -148,7 +148,7 @@ public: */ virtual void SetTPIDR_EL0(u64 value) = 0; - virtual void ChangeProcessorId(std::size_t new_core_id) = 0; + virtual void ChangeProcessorID(std::size_t new_core_id) = 0; virtual void SaveContext(ThreadContext32& ctx) = 0; virtual void SaveContext(ThreadContext64& ctx) = 0; diff --git a/src/core/arm/cpu_interrupt_handler.h b/src/core/arm/cpu_interrupt_handler.h index 91c31a271..3d062d326 100644 --- a/src/core/arm/cpu_interrupt_handler.h +++ b/src/core/arm/cpu_interrupt_handler.h @@ -23,7 +23,7 @@ public: CPUInterruptHandler(CPUInterruptHandler&&) = default; CPUInterruptHandler& operator=(CPUInterruptHandler&&) = default; - constexpr bool IsInterrupted() const { + bool IsInterrupted() const { return is_interrupted; } diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index cfda12098..0d4ab95b7 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -107,7 +107,7 @@ public: u64 GetTicksRemaining() override { if (parent.uses_wall_clock) { if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { - return 1000U; + return minimum_run_cycles; } return 0U; } @@ -116,6 +116,7 @@ public: ARM_Dynarmic_32& parent; std::size_t num_interpreted_instructions{}; + static constexpr u64 minimum_run_cycles = 1000U; }; std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, @@ -214,7 +215,7 @@ void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { cp15->uprw = static_cast(value); } -void ARM_Dynarmic_32::ChangeProcessorId(std::size_t new_core_id) { +void ARM_Dynarmic_32::ChangeProcessorID(std::size_t new_core_id) { jit->ChangeProcessorID(new_core_id); } diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index d9c0bfede..2bab31b92 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -47,7 +47,7 @@ public: void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; - void ChangeProcessorId(std::size_t new_core_id) override; + void ChangeProcessorID(std::size_t new_core_id) override; void SaveContext(ThreadContext32& ctx) override; void SaveContext(ThreadContext64& ctx) override {} diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 35a99e28a..790981034 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -144,7 +144,7 @@ public: u64 GetTicksRemaining() override { if (parent.uses_wall_clock) { if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { - return 1000U; + return minimum_run_cycles; } return 0U; } @@ -159,6 +159,7 @@ public: std::size_t num_interpreted_instructions = 0; u64 tpidrro_el0 = 0; u64 tpidr_el0 = 0; + static constexpr u64 minimum_run_cycles = 1000U; }; std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table, @@ -271,7 +272,7 @@ void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) { cb->tpidr_el0 = value; } -void ARM_Dynarmic_64::ChangeProcessorId(std::size_t new_core_id) { +void ARM_Dynarmic_64::ChangeProcessorID(std::size_t new_core_id) { jit->ChangeProcessorID(new_core_id); } diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index c74fcbcea..403c55961 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -45,7 +45,7 @@ public: void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; - void ChangeProcessorId(std::size_t new_core_id) override; + void ChangeProcessorID(std::size_t new_core_id) override; void SaveContext(ThreadContext32& ctx) override {} void SaveContext(ThreadContext64& ctx) override; diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 35e8f42e8..1df3f3ed1 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -159,7 +159,7 @@ void ARM_Unicorn::SetTPIDR_EL0(u64 value) { CHECKED(uc_reg_write(uc, UC_ARM64_REG_TPIDR_EL0, &value)); } -void ARM_Unicorn::ChangeProcessorId(std::size_t new_core_id) { +void ARM_Unicorn::ChangeProcessorID(std::size_t new_core_id) { core_index = new_core_id; } diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 8ace8b86f..810aff311 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -36,7 +36,7 @@ public: void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; - void ChangeProcessorId(std::size_t new_core_id) override; + void ChangeProcessorID(std::size_t new_core_id) override; void PrepareReschedule() override; void ClearExclusiveState() override; void ExecuteInstructions(std::size_t num_instructions); diff --git a/src/core/core.cpp b/src/core/core.cpp index 8256ec0fc..1a243c515 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -443,7 +443,7 @@ bool System::IsPoweredOn() const { } void System::PrepareReschedule() { - // impl->CurrentPhysicalCore().Stop(); + // Deprecated, does nothing, kept for backward compatibility. } void System::PrepareReschedule(const u32 core_index) { diff --git a/src/core/core.h b/src/core/core.h index 133ecb8e1..5c6cfbffe 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -138,13 +138,13 @@ public: /** * Run the OS and Application - * This function will start emulation and run the competent devices + * This function will start emulation and run the relevant devices */ ResultStatus Run(); /** * Pause the OS and Application - * This function will pause emulation and stop the competent devices + * This function will pause emulation and stop the relevant devices */ ResultStatus Pause(); diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 1aa89a1cc..5c83c41a4 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -45,9 +45,9 @@ CoreTiming::CoreTiming() { CoreTiming::~CoreTiming() = default; void CoreTiming::ThreadEntry(CoreTiming& instance) { - std::string name = "yuzu:HostTiming"; - MicroProfileOnThreadCreate(name.c_str()); - Common::SetCurrentThreadName(name.c_str()); + constexpr char name[] = "yuzu:HostTiming"; + MicroProfileOnThreadCreate(name); + Common::SetCurrentThreadName(name); Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh); instance.on_thread_init(); instance.ThreadLoop(); @@ -108,18 +108,19 @@ bool CoreTiming::HasPendingEvents() const { void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr& event_type, u64 userdata) { - basic_lock.lock(); - const u64 timeout = static_cast(GetGlobalTimeNs().count() + ns_into_future); + { + std::scoped_lock scope{basic_lock}; + const u64 timeout = static_cast(GetGlobalTimeNs().count() + ns_into_future); - event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); + event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); - std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); - basic_lock.unlock(); + std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); + } event.Set(); } void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, u64 userdata) { - basic_lock.lock(); + std::scoped_lock scope{basic_lock}; const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type.lock().get() == event_type.get() && e.userdata == userdata; }); @@ -129,7 +130,6 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, u event_queue.erase(itr, event_queue.end()); std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); } - basic_lock.unlock(); } void CoreTiming::AddTicks(u64 ticks) { @@ -187,8 +187,8 @@ void CoreTiming::RemoveEvent(const std::shared_ptr& event_type) { } std::optional CoreTiming::Advance() { - advance_lock.lock(); - basic_lock.lock(); + std::scoped_lock advance_scope{advance_lock}; + std::scoped_lock basic_scope{basic_lock}; global_timer = GetGlobalTimeNs().count(); while (!event_queue.empty() && event_queue.front().time <= global_timer) { @@ -207,12 +207,8 @@ std::optional CoreTiming::Advance() { if (!event_queue.empty()) { const s64 next_time = event_queue.front().time - global_timer; - basic_lock.unlock(); - advance_lock.unlock(); return next_time; } else { - basic_lock.unlock(); - advance_lock.unlock(); return std::nullopt; } } diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index dbb75416d..1f2af7a1b 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -472,16 +472,12 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const { } void KernelCore::InvalidateAllInstructionCaches() { - if (!IsMulticore()) { - auto& threads = GlobalScheduler().GetThreadList(); - for (auto& thread : threads) { - if (!thread->IsHLEThread()) { - auto& arm_interface = thread->ArmInterface(); - arm_interface.ClearInstructionCache(); - } + auto& threads = GlobalScheduler().GetThreadList(); + for (auto& thread : threads) { + if (!thread->IsHLEThread()) { + auto& arm_interface = thread->ArmInterface(); + arm_interface.ClearInstructionCache(); } - } else { - UNIMPLEMENTED_MSG("Cache Invalidation unimplemented for multicore"); } } diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index c82c60a16..c6bbdb080 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -37,6 +37,10 @@ void PhysicalCore::Shutdown() { scheduler.Shutdown(); } +bool PhysicalCore::IsInterrupted() const { + return interrupt_handler.IsInterrupted(); +} + void PhysicalCore::Interrupt() { guard->lock(); interrupt_handler.SetInterrupt(true); diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h index 85f6dec05..d7a7a951c 100644 --- a/src/core/hle/kernel/physical_core.h +++ b/src/core/hle/kernel/physical_core.h @@ -7,8 +7,6 @@ #include #include -#include "core/arm/cpu_interrupt_handler.h" - namespace Common { class SpinLock; } @@ -19,6 +17,7 @@ class Scheduler; namespace Core { class ARM_Interface; +class CPUInterruptHandler; class ExclusiveMonitor; class System; } // namespace Core @@ -45,9 +44,7 @@ public: void ClearInterrupt(); /// Check if this core is interrupted - bool IsInterrupted() const { - return interrupt_handler.IsInterrupted(); - } + bool IsInterrupted() const; // Shutdown this physical core. void Shutdown(); diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 61b8a396a..2b12c0dbf 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -658,7 +658,7 @@ void Scheduler::Reload() { cpu_core.LoadContext(thread->GetContext64()); cpu_core.SetTlsAddress(thread->GetTLSAddress()); cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0()); - cpu_core.ChangeProcessorId(this->core_id); + cpu_core.ChangeProcessorID(this->core_id); cpu_core.ClearExclusiveState(); } } @@ -691,7 +691,7 @@ void Scheduler::SwitchContextStep2() { cpu_core.LoadContext(new_thread->GetContext64()); cpu_core.SetTlsAddress(new_thread->GetTLSAddress()); cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); - cpu_core.ChangeProcessorId(this->core_id); + cpu_core.ChangeProcessorID(this->core_id); cpu_core.ClearExclusiveState(); } } diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 348107160..b3b4b5169 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -240,6 +240,10 @@ public: return switch_fiber; } + const std::shared_ptr& ControlContext() const { + return switch_fiber; + } + private: friend class GlobalScheduler; diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp index 12536b6d8..4fd92428f 100644 --- a/src/tests/common/fibers.cpp +++ b/src/tests/common/fibers.cpp @@ -68,7 +68,7 @@ static void ThreadStart1(u32 id, TestControl1& test_control) { * doing all the work required. */ TEST_CASE("Fibers::Setup", "[common]") { - constexpr u32 num_threads = 7; + constexpr std::size_t num_threads = 7; TestControl1 test_control{}; test_control.thread_fibers.resize(num_threads); test_control.work_fibers.resize(num_threads); -- cgit v1.2.3 From 765e37aa356d9398ee8d4b4eaac2fd35b065aa38 Mon Sep 17 00:00:00 2001 From: John Galt Date: Mon, 29 Jun 2020 06:49:22 -0400 Subject: cmake: fix fmt linking On gcc/ld, and clang/lld, fmt::v6 symbols are excluded, so linking fails. This fixes the issue. Note: This was included in the FindBoost changes I shared with BlinkHawk, however only they were merged. I'm not sure if it was missed, or if there was an issue with this part of the change. --- src/common/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index d120c8d3d..1808f7e3f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -191,5 +191,5 @@ endif() create_target_directory_groups(common) find_package(Boost 1.71 COMPONENTS context headers REQUIRED) -target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile) +target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt microprofile) target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) -- cgit v1.2.3 From e6085ea35f0b2579a1df40ee4e24d02e47ee85bc Mon Sep 17 00:00:00 2001 From: Jan Beich Date: Mon, 29 Jun 2020 22:39:31 +0000 Subject: common: add sysconf() fallback src/common/memory_detect.cpp:15:10: fatal error: 'sys/sysinfo.h' file not found #include ^~~~~~~~~~~~~~~ --- src/common/memory_detect.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'src/common') diff --git a/src/common/memory_detect.cpp b/src/common/memory_detect.cpp index 3fdc309a2..e981b9a2a 100644 --- a/src/common/memory_detect.cpp +++ b/src/common/memory_detect.cpp @@ -9,10 +9,12 @@ // clang-format on #else #include -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__FreeBSD__) #include -#else +#elif defined(__linux__) #include +#else +#include #endif #endif @@ -42,11 +44,22 @@ static MemoryInfo Detect() { sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, NULL, 0); mem_info.TotalPhysicalMemory = ramsize; mem_info.TotalSwapMemory = vmusage.xsu_total; -#else +#elif defined(__FreeBSD__) + u_long physmem, swap_total; + std::size_t sizeof_u_long = sizeof(u_long); + // sysctlbyname(const char *, void *, size_t *, const void *, size_t); + sysctlbyname("hw.physmem", &physmem, &sizeof_u_long, NULL, 0); + sysctlbyname("vm.swap_total", &swap_total, &sizeof_u_long, NULL, 0); + mem_info.TotalPhysicalMemory = physmem; + mem_info.TotalSwapMemory = swap_total; +#elif defined(__linux__) struct sysinfo meminfo; sysinfo(&meminfo); mem_info.TotalPhysicalMemory = meminfo.totalram; mem_info.TotalSwapMemory = meminfo.totalswap; +#else + mem_info.TotalPhysicalMemory = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE); + mem_info.TotalSwapMemory = 0; #endif return mem_info; -- cgit v1.2.3 From 3b1683a152610b2da161ba3084e2dd5942127303 Mon Sep 17 00:00:00 2001 From: Jan Beich Date: Tue, 30 Jun 2020 22:55:47 +0000 Subject: common: switch to nullptr for sysctl's empty new value --- src/common/memory_detect.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/common') diff --git a/src/common/memory_detect.cpp b/src/common/memory_detect.cpp index e981b9a2a..8cff6ec37 100644 --- a/src/common/memory_detect.cpp +++ b/src/common/memory_detect.cpp @@ -40,16 +40,16 @@ static MemoryInfo Detect() { // hw and vm are defined in sysctl.h // https://github.com/apple/darwin-xnu/blob/master/bsd/sys/sysctl.h#L471 // sysctlbyname(const char *, void *, size_t *, void *, size_t); - sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, NULL, 0); - sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, NULL, 0); + sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, nullptr, 0); + sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, nullptr, 0); mem_info.TotalPhysicalMemory = ramsize; mem_info.TotalSwapMemory = vmusage.xsu_total; #elif defined(__FreeBSD__) u_long physmem, swap_total; std::size_t sizeof_u_long = sizeof(u_long); // sysctlbyname(const char *, void *, size_t *, const void *, size_t); - sysctlbyname("hw.physmem", &physmem, &sizeof_u_long, NULL, 0); - sysctlbyname("vm.swap_total", &swap_total, &sizeof_u_long, NULL, 0); + sysctlbyname("hw.physmem", &physmem, &sizeof_u_long, nullptr, 0); + sysctlbyname("vm.swap_total", &swap_total, &sizeof_u_long, nullptr, 0); mem_info.TotalPhysicalMemory = physmem; mem_info.TotalSwapMemory = swap_total; #elif defined(__linux__) -- cgit v1.2.3 From 98fcd3ba5d24d65e91f89f7c9c4f3f60e29e080f Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 2 Jul 2020 22:53:12 -0400 Subject: Revert "cmake: fix fmt linking" --- src/common/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 1808f7e3f..d120c8d3d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -191,5 +191,5 @@ endif() create_target_directory_groups(common) find_package(Boost 1.71 COMPONENTS context headers REQUIRED) -target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt microprofile) +target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile) target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) -- cgit v1.2.3