From 234b5ff6a999d7d69cdcdf214e0c3984cdab11cf Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 9 Feb 2020 16:53:22 -0400 Subject: Common: Implement WallClock Interface and implement a native clock for x64 --- src/common/x64/cpu_detect.cpp | 33 +++++++++++ src/common/x64/cpu_detect.h | 12 ++++ src/common/x64/native_clock.cpp | 128 ++++++++++++++++++++++++++++++++++++++++ src/common/x64/native_clock.h | 41 +++++++++++++ 4 files changed, 214 insertions(+) create mode 100644 src/common/x64/native_clock.cpp create mode 100644 src/common/x64/native_clock.h (limited to 'src/common/x64') diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c9349a6b4..d767c544c 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -62,6 +62,17 @@ static CPUCaps Detect() { std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); + if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69) + caps.manufacturer = Manufacturer::Intel; + else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65) + caps.manufacturer = Manufacturer::AMD; + else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e) + caps.manufacturer = Manufacturer::Hygon; + else + caps.manufacturer = Manufacturer::Unknown; + + u32 family = {}; + u32 model = {}; __cpuid(cpu_id, 0x80000000); @@ -73,6 +84,14 @@ static CPUCaps Detect() { // Detect family and other miscellaneous features if (max_std_fn >= 1) { __cpuid(cpu_id, 0x00000001); + family = (cpu_id[0] >> 8) & 0xf; + model = (cpu_id[0] >> 4) & 0xf; + if (family == 0xf) { + family += (cpu_id[0] >> 20) & 0xff; + } + if (family >= 6) { + model += ((cpu_id[0] >> 16) & 0xf) << 4; + } if ((cpu_id[3] >> 25) & 1) caps.sse = true; @@ -130,6 +149,20 @@ static CPUCaps Detect() { caps.fma4 = true; } + if (max_ex_fn >= 0x80000007) { + __cpuid(cpu_id, 0x80000007); + if (cpu_id[3] & (1 << 8)) { + caps.invariant_tsc = true; + } + } + + if (max_std_fn >= 0x16) { + __cpuid(cpu_id, 0x16); + caps.base_frequency = cpu_id[0]; + caps.max_frequency = cpu_id[1]; + caps.bus_frequency = cpu_id[2]; + } + return caps; } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 20f2ba234..f0676fa5e 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -6,8 +6,16 @@ namespace Common { +enum class Manufacturer : u32 { + Intel = 0, + AMD = 1, + Hygon = 2, + Unknown = 3, +}; + /// x86/x64 CPU capabilities that may be detected by this module struct CPUCaps { + Manufacturer manufacturer; char cpu_string[0x21]; char brand_string[0x41]; bool sse; @@ -24,6 +32,10 @@ struct CPUCaps { bool fma; bool fma4; bool aes; + bool invariant_tsc; + u32 base_frequency; + u32 max_frequency; + u32 bus_frequency; }; /** diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp new file mode 100644 index 000000000..c799111fd --- /dev/null +++ b/src/common/x64/native_clock.cpp @@ -0,0 +1,128 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "common/x64/native_clock.h" + +namespace Common { + +#ifdef _MSC_VER + +namespace { + +struct uint128 { + u64 low; + u64 high; +}; + +u64 umuldiv64(u64 a, u64 b, u64 d) { + uint128 r{}; + r.low = _umul128(a, b, &r.high); + u64 remainder; + return _udiv128(r.high, r.low, d, &remainder); +} + +} // namespace + +#else + +namespace { + +u64 umuldiv64(u64 a, u64 b, u64 d) { + const u64 diva = a / d; + const u64 moda = a % d; + const u64 divb = b / d; + const u64 modb = b % d; + return diva * b + moda * divb + moda * modb / d; +} + +} // namespace + +#endif + +u64 EstimateRDTSCFrequency() { + const auto milli_10 = std::chrono::milliseconds{10}; + // get current time + _mm_mfence(); + const u64 tscStart = __rdtsc(); + const auto startTime = std::chrono::high_resolution_clock::now(); + // wait roughly 3 seconds + while (true) { + auto milli = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - startTime); + if (milli.count() >= 3000) + break; + std::this_thread::sleep_for(milli_10); + } + const auto endTime = std::chrono::high_resolution_clock::now(); + _mm_mfence(); + const u64 tscEnd = __rdtsc(); + // calculate difference + const u64 timer_diff = + std::chrono::duration_cast(endTime - startTime).count(); + const u64 tsc_diff = tscEnd - tscStart; + const u64 tsc_freq = umuldiv64(tsc_diff, 1000000000ULL, timer_diff); + return tsc_freq; +} + +namespace X64 { +NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, + u64 rtsc_frequency) + : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{ + rtsc_frequency} { + _mm_mfence(); + last_measure = __rdtsc(); + accumulated_ticks = 0U; +} + +u64 NativeClock::GetRTSC() { + rtsc_serialize.lock(); + _mm_mfence(); + const u64 current_measure = __rdtsc(); + u64 diff = current_measure - last_measure; + diff = diff & ~static_cast(static_cast(diff) >> 63); // max(diff, 0) + if (current_measure > last_measure) { + last_measure = current_measure; + } + accumulated_ticks += diff; + rtsc_serialize.unlock(); + return accumulated_ticks; +} + +std::chrono::nanoseconds NativeClock::GetTimeNS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::nanoseconds{umuldiv64(rtsc_value, 1000000000, rtsc_frequency)}; +} + +std::chrono::microseconds NativeClock::GetTimeUS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::microseconds{umuldiv64(rtsc_value, 1000000, rtsc_frequency)}; +} + +std::chrono::milliseconds NativeClock::GetTimeMS() { + const u64 rtsc_value = GetRTSC(); + return std::chrono::milliseconds{umuldiv64(rtsc_value, 1000, rtsc_frequency)}; +} + +u64 NativeClock::GetClockCycles() { + const u64 rtsc_value = GetRTSC(); + return umuldiv64(rtsc_value, emulated_clock_frequency, rtsc_frequency); +} + +u64 NativeClock::GetCPUCycles() { + const u64 rtsc_value = GetRTSC(); + return umuldiv64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); +} + +} // namespace X64 + +} // namespace Common diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h new file mode 100644 index 000000000..b58cf9f5a --- /dev/null +++ b/src/common/x64/native_clock.h @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/spin_lock.h" +#include "common/wall_clock.h" + +namespace Common { + +namespace X64 { +class NativeClock : public WallClock { +public: + NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency); + + std::chrono::nanoseconds GetTimeNS() override; + + std::chrono::microseconds GetTimeUS() override; + + std::chrono::milliseconds GetTimeMS() override; + + u64 GetClockCycles() override; + + u64 GetCPUCycles() override; + +private: + u64 GetRTSC(); + + SpinLock rtsc_serialize{}; + u64 last_measure{}; + u64 accumulated_ticks{}; + u64 rtsc_frequency; +}; +} // namespace X64 + +u64 EstimateRDTSCFrequency(); + +} // namespace Common -- cgit v1.2.3 From e3524d114246a9221c766bdf1992777b208cbd67 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Feb 2020 11:20:40 -0400 Subject: Common: Refactor & Document Wall clock. --- src/common/uint128.cpp | 22 +++++++++++++++++++ src/common/uint128.h | 3 +++ src/common/wall_clock.cpp | 13 +++++------- src/common/wall_clock.h | 13 +++++++++++- src/common/x64/native_clock.cpp | 47 ++++++----------------------------------- src/core/host_timing.cpp | 3 +-- 6 files changed, 50 insertions(+), 51 deletions(-) (limited to 'src/common/x64') diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp index 32bf56730..7e77588db 100644 --- a/src/common/uint128.cpp +++ b/src/common/uint128.cpp @@ -6,12 +6,34 @@ #include #pragma intrinsic(_umul128) +#pragma intrinsic(_udiv128) #endif #include #include "common/uint128.h" namespace Common { +#ifdef _MSC_VER + +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { + u128 r{}; + r[0] = _umul128(a, b, &r[1]); + u64 remainder; + return _udiv128(r[1], r[0], d, &remainder); +} + +#else + +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { + const u64 diva = a / d; + const u64 moda = a % d; + const u64 divb = b / d; + const u64 modb = b % d; + return diva * b + moda * divb + moda * modb / d; +} + +#endif + u128 Multiply64Into128(u64 a, u64 b) { u128 result; #ifdef _MSC_VER diff --git a/src/common/uint128.h b/src/common/uint128.h index a3be2a2cb..503cd2d0c 100644 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -9,6 +9,9 @@ namespace Common { +// This function multiplies 2 u64 values and divides it by a u64 value. +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d); + // This function multiplies 2 u64 values and produces a u128 value; u128 Multiply64Into128(u64 a, u64 b); diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index eabbba9da..8f5e17fa4 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -58,7 +58,7 @@ private: #ifdef ARCHITECTURE_x86_64 -WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { +std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { const auto& caps = GetCPUCaps(); u64 rtsc_frequency = 0; if (caps.invariant_tsc) { @@ -70,19 +70,16 @@ WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_cloc } } if (rtsc_frequency == 0) { - return static_cast( - new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency)); + return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); } else { - return static_cast( - new X64::NativeClock(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency)); + return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency); } } #else -WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { - return static_cast( - new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency)); +std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) { + return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); } #endif diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 6f763d74b..fc34429bb 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "common/common_types.h" @@ -12,10 +13,20 @@ namespace Common { class WallClock { public: + + /// Returns current wall time in nanoseconds virtual std::chrono::nanoseconds GetTimeNS() = 0; + + /// Returns current wall time in microseconds virtual std::chrono::microseconds GetTimeUS() = 0; + + /// Returns current wall time in milliseconds virtual std::chrono::milliseconds GetTimeMS() = 0; + + /// Returns current wall time in emulated clock cycles virtual u64 GetClockCycles() = 0; + + /// Returns current wall time in emulated cpu cycles virtual u64 GetCPUCycles() = 0; /// Tells if the wall clock, uses the host CPU's hardware clock @@ -35,6 +46,6 @@ private: bool is_native; }; -WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency); +std::unique_ptr CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency); } // namespace Common diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index c799111fd..26d4d0ba6 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -11,44 +11,11 @@ #include #endif +#include "common/uint128.h" #include "common/x64/native_clock.h" namespace Common { -#ifdef _MSC_VER - -namespace { - -struct uint128 { - u64 low; - u64 high; -}; - -u64 umuldiv64(u64 a, u64 b, u64 d) { - uint128 r{}; - r.low = _umul128(a, b, &r.high); - u64 remainder; - return _udiv128(r.high, r.low, d, &remainder); -} - -} // namespace - -#else - -namespace { - -u64 umuldiv64(u64 a, u64 b, u64 d) { - const u64 diva = a / d; - const u64 moda = a % d; - const u64 divb = b / d; - const u64 modb = b % d; - return diva * b + moda * divb + moda * modb / d; -} - -} // namespace - -#endif - u64 EstimateRDTSCFrequency() { const auto milli_10 = std::chrono::milliseconds{10}; // get current time @@ -70,7 +37,7 @@ u64 EstimateRDTSCFrequency() { const u64 timer_diff = std::chrono::duration_cast(endTime - startTime).count(); const u64 tsc_diff = tscEnd - tscStart; - const u64 tsc_freq = umuldiv64(tsc_diff, 1000000000ULL, timer_diff); + const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); return tsc_freq; } @@ -100,27 +67,27 @@ u64 NativeClock::GetRTSC() { std::chrono::nanoseconds NativeClock::GetTimeNS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::nanoseconds{umuldiv64(rtsc_value, 1000000000, rtsc_frequency)}; + return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; } std::chrono::microseconds NativeClock::GetTimeUS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::microseconds{umuldiv64(rtsc_value, 1000000, rtsc_frequency)}; + return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; } std::chrono::milliseconds NativeClock::GetTimeMS() { const u64 rtsc_value = GetRTSC(); - return std::chrono::milliseconds{umuldiv64(rtsc_value, 1000, rtsc_frequency)}; + return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; } u64 NativeClock::GetClockCycles() { const u64 rtsc_value = GetRTSC(); - return umuldiv64(rtsc_value, emulated_clock_frequency, rtsc_frequency); + return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); } u64 NativeClock::GetCPUCycles() { const u64 rtsc_value = GetRTSC(); - return umuldiv64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); + return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); } } // namespace X64 diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp index ef9977b76..4ccf7c6c1 100644 --- a/src/core/host_timing.cpp +++ b/src/core/host_timing.cpp @@ -36,8 +36,7 @@ struct CoreTiming::Event { }; CoreTiming::CoreTiming() { - Common::WallClock* wall = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ); - clock = std::unique_ptr(wall); + clock = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ); } CoreTiming::~CoreTiming() = default; -- cgit v1.2.3 From d6474b4aca7f054d00df350c716709475ef0f49b Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 16 May 2020 07:24:57 -0400 Subject: common/cpu_detect: Add AVX512 detection --- src/common/x64/cpu_detect.cpp | 5 +++++ src/common/x64/cpu_detect.h | 1 + 2 files changed, 6 insertions(+) (limited to 'src/common/x64') diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c9349a6b4..f35dcb498 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -110,6 +110,11 @@ static CPUCaps Detect() { caps.bmi1 = true; if ((cpu_id[1] >> 8) & 1) caps.bmi2 = true; + // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) + if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && + (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { + caps.avx512 = caps.avx2; + } } } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 20f2ba234..7606c3f7b 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -19,6 +19,7 @@ struct CPUCaps { bool lzcnt; bool avx; bool avx2; + bool avx512; bool bmi1; bool bmi2; bool fma; -- cgit v1.2.3 From 18dcb0934217628711c5b1d22fd6d7635e683e3f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Feb 2020 12:28:55 -0400 Subject: HostTiming: Pause the hardware clock on pause. --- src/common/wall_clock.cpp | 4 ++++ src/common/wall_clock.h | 2 ++ src/common/x64/native_clock.cpp | 7 +++++++ src/common/x64/native_clock.h | 2 ++ src/core/core.cpp | 2 +- src/core/core_timing.cpp | 6 ++++++ src/core/core_timing.h | 1 + 7 files changed, 23 insertions(+), 1 deletion(-) (limited to 'src/common/x64') diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index d4d35f4e7..a46db6bbf 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -53,6 +53,10 @@ public: return Common::Divide128On32(temporary, 1000000000).first; } + void Pause(bool is_paused) override { + // Do nothing in this clock type. + } + private: base_time_point start_time; }; diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index ed284cf50..367d72134 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -28,6 +28,8 @@ public: /// Returns current wall time in emulated cpu cycles virtual u64 GetCPUCycles() = 0; + virtual void Pause(bool is_paused) = 0; + /// Tells if the wall clock, uses the host CPU's hardware clock bool IsNative() const { return is_native; diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 26d4d0ba6..926f92ff8 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -65,6 +65,13 @@ u64 NativeClock::GetRTSC() { return accumulated_ticks; } +void NativeClock::Pause(bool is_paused) { + if (!is_paused) { + _mm_mfence(); + last_measure = __rdtsc(); + } +} + std::chrono::nanoseconds NativeClock::GetTimeNS() { const u64 rtsc_value = GetRTSC(); return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index b58cf9f5a..3851f8fc2 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -26,6 +26,8 @@ public: u64 GetCPUCycles() override; + void Pause(bool is_paused) override; + private: u64 GetRTSC(); diff --git a/src/core/core.cpp b/src/core/core.cpp index e8936b09d..1d6179a80 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -137,8 +137,8 @@ struct System::Impl { ResultStatus Pause() { status = ResultStatus::Success; - kernel.Suspend(true); core_timing.SyncPause(true); + kernel.Suspend(true); cpu_manager.Pause(true); return status; diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index cc32a853b..5a7abcfca 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -77,6 +77,9 @@ void CoreTiming::SyncPause(bool is_paused) { return; } Pause(is_paused); + if (!is_paused) { + pause_event.Set(); + } event.Set(); while (paused_set != is_paused) ; @@ -197,6 +200,9 @@ void CoreTiming::ThreadLoop() { wait_set = false; } paused_set = true; + clock->Pause(true); + pause_event.Wait(); + clock->Pause(false); } } diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 707c8ef0c..c70b605c8 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -136,6 +136,7 @@ private: std::shared_ptr ev_lost; Common::Event event{}; + Common::Event pause_event{}; Common::SpinLock basic_lock{}; Common::SpinLock advance_lock{}; std::unique_ptr timer_thread; -- cgit v1.2.3 From 534466754f381e90f5f6475a0c02031242a5c256 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 21 Mar 2020 12:23:13 -0400 Subject: X64 Clock: Reduce accuracy to be less or equal to guest accuracy. --- src/common/x64/native_clock.cpp | 3 ++- src/common/x64/native_clock.h | 5 +++++ src/core/arm/dynarmic/arm_dynarmic_64.cpp | 3 +++ 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'src/common/x64') diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 926f92ff8..f1bc60fd2 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -62,7 +62,8 @@ u64 NativeClock::GetRTSC() { } accumulated_ticks += diff; rtsc_serialize.unlock(); - return accumulated_ticks; + /// The clock cannot be more precise than the guest timer, remove the lower bits + return accumulated_ticks & inaccuracy_mask; } void NativeClock::Pause(bool is_paused) { diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 3851f8fc2..e853094d2 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -31,6 +31,11 @@ public: private: u64 GetRTSC(); + /// value used to reduce the native clocks accuracy as some apss rely on + /// undefined behavior where the level of accuracy in the clock shouldn't + /// be higher. + static constexpr u64 inaccuracy_mask = ~(0x100 - 1); + SpinLock rtsc_serialize{}; u64 last_measure{}; u64 accumulated_ticks{}; diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 547a6e07e..3f18dede2 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -184,6 +184,9 @@ std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable& config.enable_fast_dispatch = false; } + // CNTPCT uses wall clock. + config.wall_clock_cntpct = true; + return std::make_shared(config); } -- cgit v1.2.3 From 31651523968312433ebd267a74e86689107a7023 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 18 May 2020 13:08:53 -0400 Subject: Common/NativeClockx86: Reduce native clock accuracy further. --- src/common/x64/native_clock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/common/x64') diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index e853094d2..891a3bbfd 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -34,7 +34,7 @@ private: /// value used to reduce the native clocks accuracy as some apss rely on /// undefined behavior where the level of accuracy in the clock shouldn't /// be higher. - static constexpr u64 inaccuracy_mask = ~(0x100 - 1); + static constexpr u64 inaccuracy_mask = ~(0x400 - 1); SpinLock rtsc_serialize{}; u64 last_measure{}; -- cgit v1.2.3 From 2f8947583f2f0af4058600243d6c1d244e3c4890 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 27 Jun 2020 18:20:06 -0400 Subject: Core/Common: Address Feedback. --- src/common/fiber.cpp | 10 +++------- src/common/spin_lock.cpp | 6 +++--- src/common/spin_lock.h | 5 +++++ src/common/x64/native_clock.cpp | 4 ++-- src/core/arm/arm_interface.h | 2 +- src/core/arm/cpu_interrupt_handler.h | 2 +- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 5 +++-- src/core/arm/dynarmic/arm_dynarmic_32.h | 2 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 5 +++-- src/core/arm/dynarmic/arm_dynarmic_64.h | 2 +- src/core/arm/unicorn/arm_unicorn.cpp | 2 +- src/core/arm/unicorn/arm_unicorn.h | 2 +- src/core/core.cpp | 2 +- src/core/core.h | 4 ++-- src/core/core_timing.cpp | 28 ++++++++++++---------------- src/core/hle/kernel/kernel.cpp | 14 +++++--------- src/core/hle/kernel/physical_core.cpp | 4 ++++ src/core/hle/kernel/physical_core.h | 7 ++----- src/core/hle/kernel/scheduler.cpp | 4 ++-- src/core/hle/kernel/scheduler.h | 4 ++++ src/tests/common/fibers.cpp | 2 +- 21 files changed, 58 insertions(+), 58 deletions(-) (limited to 'src/common/x64') diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index f97ad433b..1c1d09ccb 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -54,9 +54,7 @@ Fiber::Fiber(std::function&& entry_point_func, void* start_paramete impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this); } -Fiber::Fiber() { - impl = std::make_unique(); -} +Fiber::Fiber() : impl{std::make_unique()} {} Fiber::~Fiber() { if (released) { @@ -116,8 +114,8 @@ std::shared_ptr Fiber::ThreadToFiber() { struct Fiber::FiberImpl { alignas(64) std::array stack; - u8* stack_limit; alignas(64) std::array rewind_stack; + u8* stack_limit; u8* rewind_stack_limit; boost::context::detail::fcontext_t context; boost::context::detail::fcontext_t rewind_context; @@ -168,9 +166,7 @@ void Fiber::SetRewindPoint(std::function&& rewind_func, void* start rewind_parameter = start_parameter; } -Fiber::Fiber() { - impl = std::make_unique(); -} +Fiber::Fiber() : impl{std::make_unique()} {} Fiber::~Fiber() { if (released) { diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp index c7b46aac6..c1524220f 100644 --- a/src/common/spin_lock.cpp +++ b/src/common/spin_lock.cpp @@ -20,7 +20,7 @@ namespace { -void thread_pause() { +void ThreadPause() { #if __x86_64__ _mm_pause(); #elif __aarch64__ && _MSC_VER @@ -30,13 +30,13 @@ void thread_pause() { #endif } -} // namespace +} // Anonymous namespace namespace Common { void SpinLock::lock() { while (lck.test_and_set(std::memory_order_acquire)) { - thread_pause(); + ThreadPause(); } } diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h index 70282a961..1df5528c4 100644 --- a/src/common/spin_lock.h +++ b/src/common/spin_lock.h @@ -8,6 +8,11 @@ namespace Common { +/** + * SpinLock class + * a lock similar to mutex that forces a thread to spin wait instead calling the + * supervisor. Should be used on short sequences of code. + */ class SpinLock { public: void lock(); diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index f1bc60fd2..424b39b1f 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #ifdef _MSC_VER @@ -52,7 +53,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequenc } u64 NativeClock::GetRTSC() { - rtsc_serialize.lock(); + std::scoped_lock scope{rtsc_serialize}; _mm_mfence(); const u64 current_measure = __rdtsc(); u64 diff = current_measure - last_measure; @@ -61,7 +62,6 @@ u64 NativeClock::GetRTSC() { last_measure = current_measure; } accumulated_ticks += diff; - rtsc_serialize.unlock(); /// The clock cannot be more precise than the guest timer, remove the lower bits return accumulated_ticks & inaccuracy_mask; } diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 0c1d6ac39..1f24051e4 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -148,7 +148,7 @@ public: */ virtual void SetTPIDR_EL0(u64 value) = 0; - virtual void ChangeProcessorId(std::size_t new_core_id) = 0; + virtual void ChangeProcessorID(std::size_t new_core_id) = 0; virtual void SaveContext(ThreadContext32& ctx) = 0; virtual void SaveContext(ThreadContext64& ctx) = 0; diff --git a/src/core/arm/cpu_interrupt_handler.h b/src/core/arm/cpu_interrupt_handler.h index 91c31a271..3d062d326 100644 --- a/src/core/arm/cpu_interrupt_handler.h +++ b/src/core/arm/cpu_interrupt_handler.h @@ -23,7 +23,7 @@ public: CPUInterruptHandler(CPUInterruptHandler&&) = default; CPUInterruptHandler& operator=(CPUInterruptHandler&&) = default; - constexpr bool IsInterrupted() const { + bool IsInterrupted() const { return is_interrupted; } diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index cfda12098..0d4ab95b7 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -107,7 +107,7 @@ public: u64 GetTicksRemaining() override { if (parent.uses_wall_clock) { if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { - return 1000U; + return minimum_run_cycles; } return 0U; } @@ -116,6 +116,7 @@ public: ARM_Dynarmic_32& parent; std::size_t num_interpreted_instructions{}; + static constexpr u64 minimum_run_cycles = 1000U; }; std::shared_ptr ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, @@ -214,7 +215,7 @@ void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { cp15->uprw = static_cast(value); } -void ARM_Dynarmic_32::ChangeProcessorId(std::size_t new_core_id) { +void ARM_Dynarmic_32::ChangeProcessorID(std::size_t new_core_id) { jit->ChangeProcessorID(new_core_id); } diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index d9c0bfede..2bab31b92 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -47,7 +47,7 @@ public: void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; - void ChangeProcessorId(std::size_t new_core_id) override; + void ChangeProcessorID(std::size_t new_core_id) override; void SaveContext(ThreadContext32& ctx) override; void SaveContext(ThreadContext64& ctx) override {} diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 35a99e28a..790981034 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -144,7 +144,7 @@ public: u64 GetTicksRemaining() override { if (parent.uses_wall_clock) { if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { - return 1000U; + return minimum_run_cycles; } return 0U; } @@ -159,6 +159,7 @@ public: std::size_t num_interpreted_instructions = 0; u64 tpidrro_el0 = 0; u64 tpidr_el0 = 0; + static constexpr u64 minimum_run_cycles = 1000U; }; std::shared_ptr ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table, @@ -271,7 +272,7 @@ void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) { cb->tpidr_el0 = value; } -void ARM_Dynarmic_64::ChangeProcessorId(std::size_t new_core_id) { +void ARM_Dynarmic_64::ChangeProcessorID(std::size_t new_core_id) { jit->ChangeProcessorID(new_core_id); } diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index c74fcbcea..403c55961 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -45,7 +45,7 @@ public: void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; - void ChangeProcessorId(std::size_t new_core_id) override; + void ChangeProcessorID(std::size_t new_core_id) override; void SaveContext(ThreadContext32& ctx) override {} void SaveContext(ThreadContext64& ctx) override; diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 35e8f42e8..1df3f3ed1 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -159,7 +159,7 @@ void ARM_Unicorn::SetTPIDR_EL0(u64 value) { CHECKED(uc_reg_write(uc, UC_ARM64_REG_TPIDR_EL0, &value)); } -void ARM_Unicorn::ChangeProcessorId(std::size_t new_core_id) { +void ARM_Unicorn::ChangeProcessorID(std::size_t new_core_id) { core_index = new_core_id; } diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 8ace8b86f..810aff311 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -36,7 +36,7 @@ public: void SetTlsAddress(VAddr address) override; void SetTPIDR_EL0(u64 value) override; u64 GetTPIDR_EL0() const override; - void ChangeProcessorId(std::size_t new_core_id) override; + void ChangeProcessorID(std::size_t new_core_id) override; void PrepareReschedule() override; void ClearExclusiveState() override; void ExecuteInstructions(std::size_t num_instructions); diff --git a/src/core/core.cpp b/src/core/core.cpp index 8256ec0fc..1a243c515 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -443,7 +443,7 @@ bool System::IsPoweredOn() const { } void System::PrepareReschedule() { - // impl->CurrentPhysicalCore().Stop(); + // Deprecated, does nothing, kept for backward compatibility. } void System::PrepareReschedule(const u32 core_index) { diff --git a/src/core/core.h b/src/core/core.h index 133ecb8e1..5c6cfbffe 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -138,13 +138,13 @@ public: /** * Run the OS and Application - * This function will start emulation and run the competent devices + * This function will start emulation and run the relevant devices */ ResultStatus Run(); /** * Pause the OS and Application - * This function will pause emulation and stop the competent devices + * This function will pause emulation and stop the relevant devices */ ResultStatus Pause(); diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 1aa89a1cc..5c83c41a4 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -45,9 +45,9 @@ CoreTiming::CoreTiming() { CoreTiming::~CoreTiming() = default; void CoreTiming::ThreadEntry(CoreTiming& instance) { - std::string name = "yuzu:HostTiming"; - MicroProfileOnThreadCreate(name.c_str()); - Common::SetCurrentThreadName(name.c_str()); + constexpr char name[] = "yuzu:HostTiming"; + MicroProfileOnThreadCreate(name); + Common::SetCurrentThreadName(name); Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh); instance.on_thread_init(); instance.ThreadLoop(); @@ -108,18 +108,19 @@ bool CoreTiming::HasPendingEvents() const { void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr& event_type, u64 userdata) { - basic_lock.lock(); - const u64 timeout = static_cast(GetGlobalTimeNs().count() + ns_into_future); + { + std::scoped_lock scope{basic_lock}; + const u64 timeout = static_cast(GetGlobalTimeNs().count() + ns_into_future); - event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); + event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); - std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); - basic_lock.unlock(); + std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); + } event.Set(); } void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, u64 userdata) { - basic_lock.lock(); + std::scoped_lock scope{basic_lock}; const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type.lock().get() == event_type.get() && e.userdata == userdata; }); @@ -129,7 +130,6 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, u event_queue.erase(itr, event_queue.end()); std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); } - basic_lock.unlock(); } void CoreTiming::AddTicks(u64 ticks) { @@ -187,8 +187,8 @@ void CoreTiming::RemoveEvent(const std::shared_ptr& event_type) { } std::optional CoreTiming::Advance() { - advance_lock.lock(); - basic_lock.lock(); + std::scoped_lock advance_scope{advance_lock}; + std::scoped_lock basic_scope{basic_lock}; global_timer = GetGlobalTimeNs().count(); while (!event_queue.empty() && event_queue.front().time <= global_timer) { @@ -207,12 +207,8 @@ std::optional CoreTiming::Advance() { if (!event_queue.empty()) { const s64 next_time = event_queue.front().time - global_timer; - basic_lock.unlock(); - advance_lock.unlock(); return next_time; } else { - basic_lock.unlock(); - advance_lock.unlock(); return std::nullopt; } } diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index dbb75416d..1f2af7a1b 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -472,16 +472,12 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const { } void KernelCore::InvalidateAllInstructionCaches() { - if (!IsMulticore()) { - auto& threads = GlobalScheduler().GetThreadList(); - for (auto& thread : threads) { - if (!thread->IsHLEThread()) { - auto& arm_interface = thread->ArmInterface(); - arm_interface.ClearInstructionCache(); - } + auto& threads = GlobalScheduler().GetThreadList(); + for (auto& thread : threads) { + if (!thread->IsHLEThread()) { + auto& arm_interface = thread->ArmInterface(); + arm_interface.ClearInstructionCache(); } - } else { - UNIMPLEMENTED_MSG("Cache Invalidation unimplemented for multicore"); } } diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index c82c60a16..c6bbdb080 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -37,6 +37,10 @@ void PhysicalCore::Shutdown() { scheduler.Shutdown(); } +bool PhysicalCore::IsInterrupted() const { + return interrupt_handler.IsInterrupted(); +} + void PhysicalCore::Interrupt() { guard->lock(); interrupt_handler.SetInterrupt(true); diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h index 85f6dec05..d7a7a951c 100644 --- a/src/core/hle/kernel/physical_core.h +++ b/src/core/hle/kernel/physical_core.h @@ -7,8 +7,6 @@ #include #include -#include "core/arm/cpu_interrupt_handler.h" - namespace Common { class SpinLock; } @@ -19,6 +17,7 @@ class Scheduler; namespace Core { class ARM_Interface; +class CPUInterruptHandler; class ExclusiveMonitor; class System; } // namespace Core @@ -45,9 +44,7 @@ public: void ClearInterrupt(); /// Check if this core is interrupted - bool IsInterrupted() const { - return interrupt_handler.IsInterrupted(); - } + bool IsInterrupted() const; // Shutdown this physical core. void Shutdown(); diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 61b8a396a..2b12c0dbf 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -658,7 +658,7 @@ void Scheduler::Reload() { cpu_core.LoadContext(thread->GetContext64()); cpu_core.SetTlsAddress(thread->GetTLSAddress()); cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0()); - cpu_core.ChangeProcessorId(this->core_id); + cpu_core.ChangeProcessorID(this->core_id); cpu_core.ClearExclusiveState(); } } @@ -691,7 +691,7 @@ void Scheduler::SwitchContextStep2() { cpu_core.LoadContext(new_thread->GetContext64()); cpu_core.SetTlsAddress(new_thread->GetTLSAddress()); cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); - cpu_core.ChangeProcessorId(this->core_id); + cpu_core.ChangeProcessorID(this->core_id); cpu_core.ClearExclusiveState(); } } diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 348107160..b3b4b5169 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -240,6 +240,10 @@ public: return switch_fiber; } + const std::shared_ptr& ControlContext() const { + return switch_fiber; + } + private: friend class GlobalScheduler; diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp index 12536b6d8..4fd92428f 100644 --- a/src/tests/common/fibers.cpp +++ b/src/tests/common/fibers.cpp @@ -68,7 +68,7 @@ static void ThreadStart1(u32 id, TestControl1& test_control) { * doing all the work required. */ TEST_CASE("Fibers::Setup", "[common]") { - constexpr u32 num_threads = 7; + constexpr std::size_t num_threads = 7; TestControl1 test_control{}; test_control.thread_fibers.resize(num_threads); test_control.work_fibers.resize(num_threads); -- cgit v1.2.3