From dd12dd4c67dd4bc6e7a7d071b925afc38e687f8b Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 22 Apr 2023 22:55:03 -0400 Subject: x64: Deduplicate RDTSC usage --- src/common/x64/cpu_detect.cpp | 3 +++ src/common/x64/cpu_wait.cpp | 20 +------------------- src/common/x64/rdtsc.cpp | 39 +++++++++++++++++++++++++++++++++++++++ src/common/x64/rdtsc.h | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 19 deletions(-) create mode 100644 src/common/x64/rdtsc.cpp create mode 100644 src/common/x64/rdtsc.h (limited to 'src/common/x64') diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 72ed6e96c..c998b1197 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/x64/cpu_detect.h" +#include "common/x64/rdtsc.h" #ifdef _WIN32 #include @@ -187,6 +188,8 @@ static CPUCaps Detect() { caps.tsc_frequency = static_cast(caps.crystal_frequency) * caps.tsc_crystal_ratio_numerator / caps.tsc_crystal_ratio_denominator; + } else { + caps.tsc_frequency = X64::EstimateRDTSCFrequency(); } } diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..c53dd4945 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -9,19 +9,11 @@ #include "common/x64/cpu_detect.h" #include "common/x64/cpu_wait.h" +#include "common/x64/rdtsc.h" namespace Common::X64 { #ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { - _mm_lfence(); - _ReadWriteBarrier(); - const u64 result = __rdtsc(); - _mm_lfence(); - _ReadWriteBarrier(); - return result; -} - __forceinline static void TPAUSE() { // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. // For reference: @@ -32,16 +24,6 @@ __forceinline static void TPAUSE() { _tpause(0, FencedRDTSC() + PauseCycles); } #else -static u64 FencedRDTSC() { - u64 eax; - u64 edx; - asm volatile("lfence\n\t" - "rdtsc\n\t" - "lfence\n\t" - : "=a"(eax), "=d"(edx)); - return (edx << 32) | eax; -} - static void TPAUSE() { // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. // For reference: diff --git a/src/common/x64/rdtsc.cpp b/src/common/x64/rdtsc.cpp new file mode 100644 index 000000000..9273274a3 --- /dev/null +++ b/src/common/x64/rdtsc.cpp @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "common/steady_clock.h" +#include "common/uint128.h" +#include "common/x64/rdtsc.h" + +namespace Common::X64 { + +template +static u64 RoundToNearest(u64 value) { + const auto mod = value % Nearest; + return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +} + +u64 EstimateRDTSCFrequency() { + // Discard the first result measuring the rdtsc. + FencedRDTSC(); + std::this_thread::sleep_for(std::chrono::milliseconds{1}); + FencedRDTSC(); + + // Get the current time. + const auto start_time = RealTimeClock::Now(); + const u64 tsc_start = FencedRDTSC(); + // Wait for 100 milliseconds. + std::this_thread::sleep_for(std::chrono::milliseconds{100}); + const auto end_time = RealTimeClock::Now(); + const u64 tsc_end = FencedRDTSC(); + // Calculate differences. + const u64 timer_diff = static_cast( + std::chrono::duration_cast(end_time - start_time).count()); + const u64 tsc_diff = tsc_end - tsc_start; + const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); + return RoundToNearest<100'000>(tsc_freq); +} + +} // namespace Common::X64 diff --git a/src/common/x64/rdtsc.h b/src/common/x64/rdtsc.h new file mode 100644 index 000000000..0ec4f52f9 --- /dev/null +++ b/src/common/x64/rdtsc.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#ifdef _MSC_VER +#include +#endif + +#include "common/common_types.h" + +namespace Common::X64 { + +#ifdef _MSC_VER +__forceinline static u64 FencedRDTSC() { + _mm_lfence(); + _ReadWriteBarrier(); + const u64 result = __rdtsc(); + _mm_lfence(); + _ReadWriteBarrier(); + return result; +} +#else +static inline u64 FencedRDTSC() { + u64 eax; + u64 edx; + asm volatile("lfence\n\t" + "rdtsc\n\t" + "lfence\n\t" + : "=a"(eax), "=d"(edx)); + return (edx << 32) | eax; +} +#endif + +u64 EstimateRDTSCFrequency(); + +} // namespace Common::X64 -- cgit v1.2.3 From 1492a65454d6a03f641b136cc61e68870be00218 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 22 Apr 2023 23:08:28 -0400 Subject: (wall, native)_clock: Rework NativeClock --- src/common/steady_clock.cpp | 5 +- src/common/wall_clock.cpp | 73 +++++++----------- src/common/wall_clock.h | 54 ++++++------- src/common/x64/native_clock.cpp | 165 ++++++---------------------------------- src/common/x64/native_clock.h | 56 +++++--------- 5 files changed, 94 insertions(+), 259 deletions(-) (limited to 'src/common/x64') diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp index 782859196..9415eed29 100644 --- a/src/common/steady_clock.cpp +++ b/src/common/steady_clock.cpp @@ -28,13 +28,12 @@ static s64 GetSystemTimeNS() { // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. static constexpr s64 Multiplier = 100; // Convert Windows epoch to Unix epoch. - static constexpr s64 WindowsEpochToUnixEpochNS = 0x19DB1DED53E8000LL; + static constexpr s64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL; FILETIME filetime; GetSystemTimePreciseAsFileTime(&filetime); return Multiplier * ((static_cast(filetime.dwHighDateTime) << 32) + - static_cast(filetime.dwLowDateTime)) - - WindowsEpochToUnixEpochNS; + static_cast(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch); } #endif diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 817e71d52..ad8db06b0 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -2,88 +2,71 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/steady_clock.h" -#include "common/uint128.h" #include "common/wall_clock.h" #ifdef ARCHITECTURE_x86_64 #include "common/x64/cpu_detect.h" #include "common/x64/native_clock.h" +#include "common/x64/rdtsc.h" #endif namespace Common { class StandardWallClock final : public WallClock { public: - explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) - : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, - start_time{SteadyClock::Now()} {} + explicit StandardWallClock() : start_time{SteadyClock::Now()} {} - std::chrono::nanoseconds GetTimeNS() override { + std::chrono::nanoseconds GetTimeNS() const override { return SteadyClock::Now() - start_time; } - std::chrono::microseconds GetTimeUS() override { - return std::chrono::duration_cast(GetTimeNS()); + std::chrono::microseconds GetTimeUS() const override { + return static_cast(GetHostTicksElapsed() / NsToUsRatio::den); } - std::chrono::milliseconds GetTimeMS() override { - return std::chrono::duration_cast(GetTimeNS()); + std::chrono::milliseconds GetTimeMS() const override { + return static_cast(GetHostTicksElapsed() / NsToMsRatio::den); } - u64 GetClockCycles() override { - const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); - return Common::Divide128On32(temp, NS_RATIO).first; + u64 GetCNTPCT() const override { + return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; } - u64 GetCPUCycles() override { - const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); - return Common::Divide128On32(temp, NS_RATIO).first; + u64 GetHostTicksNow() const override { + return static_cast(SteadyClock::Now().time_since_epoch().count()); } - void Pause([[maybe_unused]] bool is_paused) override { - // Do nothing in this clock type. + u64 GetHostTicksElapsed() const override { + return static_cast(GetTimeNS().count()); + } + + bool IsNative() const override { + return false; } private: SteadyClock::time_point start_time; }; +std::unique_ptr CreateOptimalClock() { #ifdef ARCHITECTURE_x86_64 - -std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { const auto& caps = GetCPUCaps(); - u64 rtsc_frequency = 0; - if (caps.invariant_tsc) { - rtsc_frequency = caps.tsc_frequency ? caps.tsc_frequency : EstimateRDTSCFrequency(); - } - // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than: - // - A nanosecond - // - The emulated CPU frequency - // - The emulated clock counter frequency (CNTFRQ) - if (rtsc_frequency <= WallClock::NS_RATIO || rtsc_frequency <= emulated_cpu_frequency || - rtsc_frequency <= emulated_clock_frequency) { - return std::make_unique(emulated_cpu_frequency, - emulated_clock_frequency); + if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::CNTFRQ) { + return std::make_unique(caps.tsc_frequency); } else { - return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency, - rtsc_frequency); + // Fallback to StandardWallClock if the hardware TSC + // - Is not invariant + // - Is not more precise than CNTFRQ + return std::make_unique(); } -} - #else - -std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { - return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); -} - + return std::make_unique(); #endif +} -std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { - return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); +std::unique_ptr CreateStandardWallClock() { + return std::make_unique(); } } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 157ec5eae..a73e6e644 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -5,6 +5,7 @@ #include #include +#include #include "common/common_types.h" @@ -12,50 +13,43 @@ namespace Common { class WallClock { public: - static constexpr u64 NS_RATIO = 1'000'000'000; - static constexpr u64 US_RATIO = 1'000'000; - static constexpr u64 MS_RATIO = 1'000; + static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz virtual ~WallClock() = default; - /// Returns current wall time in nanoseconds - [[nodiscard]] virtual std::chrono::nanoseconds GetTimeNS() = 0; + /// @returns The time in nanoseconds since the construction of this clock. + virtual std::chrono::nanoseconds GetTimeNS() const = 0; - /// Returns current wall time in microseconds - [[nodiscard]] virtual std::chrono::microseconds GetTimeUS() = 0; + /// @returns The time in microseconds since the construction of this clock. + virtual std::chrono::microseconds GetTimeUS() const = 0; - /// Returns current wall time in milliseconds - [[nodiscard]] virtual std::chrono::milliseconds GetTimeMS() = 0; + /// @returns The time in milliseconds since the construction of this clock. + virtual std::chrono::milliseconds GetTimeMS() const = 0; - /// Returns current wall time in emulated clock cycles - [[nodiscard]] virtual u64 GetClockCycles() = 0; + /// @returns The guest CNTPCT ticks since the construction of this clock. + virtual u64 GetCNTPCT() const = 0; - /// Returns current wall time in emulated cpu cycles - [[nodiscard]] virtual u64 GetCPUCycles() = 0; + /// @returns The raw host timer ticks since an indeterminate epoch. + virtual u64 GetHostTicksNow() const = 0; - virtual void Pause(bool is_paused) = 0; + /// @returns The raw host timer ticks since the construction of this clock. + virtual u64 GetHostTicksElapsed() const = 0; - /// Tells if the wall clock, uses the host CPU's hardware clock - [[nodiscard]] bool IsNative() const { - return is_native; - } + /// @returns Whether the clock directly uses the host's hardware clock. + virtual bool IsNative() const = 0; protected: - explicit WallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, bool is_native_) - : emulated_cpu_frequency{emulated_cpu_frequency_}, - emulated_clock_frequency{emulated_clock_frequency_}, is_native{is_native_} {} + using NsRatio = std::nano; + using UsRatio = std::micro; + using MsRatio = std::milli; - u64 emulated_cpu_frequency; - u64 emulated_clock_frequency; - -private: - bool is_native; + using NsToUsRatio = std::ratio_divide; + using NsToMsRatio = std::ratio_divide; + using NsToCNTPCTRatio = std::ratio; }; -[[nodiscard]] std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency); +std::unique_ptr CreateOptimalClock(); -[[nodiscard]] std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency); +std::unique_ptr CreateStandardWallClock(); } // namespace Common diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 277b00662..5d1eb0590 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -1,164 +1,45 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include -#include -#include - -#include "common/atomic_ops.h" -#include "common/steady_clock.h" #include "common/uint128.h" #include "common/x64/native_clock.h" +#include "common/x64/rdtsc.h" -#ifdef _MSC_VER -#include -#endif +namespace Common::X64 { -namespace Common { +NativeClock::NativeClock(u64 rdtsc_frequency_) + : start_ticks{FencedRDTSC()}, rdtsc_frequency{rdtsc_frequency_}, + ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)}, + us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)}, + ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)}, + cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)} {} -#ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { - _mm_lfence(); - _ReadWriteBarrier(); - const u64 result = __rdtsc(); - _mm_lfence(); - _ReadWriteBarrier(); - return result; -} -#else -static u64 FencedRDTSC() { - u64 eax; - u64 edx; - asm volatile("lfence\n\t" - "rdtsc\n\t" - "lfence\n\t" - : "=a"(eax), "=d"(edx)); - return (edx << 32) | eax; +std::chrono::nanoseconds NativeClock::GetTimeNS() const { + return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)}; } -#endif -template -static u64 RoundToNearest(u64 value) { - const auto mod = value % Nearest; - return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +std::chrono::microseconds NativeClock::GetTimeUS() const { + return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_rdtsc_factor)}; } -u64 EstimateRDTSCFrequency() { - // Discard the first result measuring the rdtsc. - FencedRDTSC(); - std::this_thread::sleep_for(std::chrono::milliseconds{1}); - FencedRDTSC(); - - // Get the current time. - const auto start_time = Common::RealTimeClock::Now(); - const u64 tsc_start = FencedRDTSC(); - // Wait for 250 milliseconds. - std::this_thread::sleep_for(std::chrono::milliseconds{250}); - const auto end_time = Common::RealTimeClock::Now(); - const u64 tsc_end = FencedRDTSC(); - // Calculate differences. - const u64 timer_diff = static_cast( - std::chrono::duration_cast(end_time - start_time).count()); - const u64 tsc_diff = tsc_end - tsc_start; - const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - return RoundToNearest<1000>(tsc_freq); +std::chrono::milliseconds NativeClock::GetTimeMS() const { + return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_rdtsc_factor)}; } -namespace X64 { -NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, - u64 rtsc_frequency_) - : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ - rtsc_frequency_} { - // Thread to re-adjust the RDTSC frequency after 10 seconds has elapsed. - time_sync_thread = std::jthread{[this](std::stop_token token) { - // Get the current time. - const auto start_time = Common::RealTimeClock::Now(); - const u64 tsc_start = FencedRDTSC(); - // Wait for 10 seconds. - if (!Common::StoppableTimedWait(token, std::chrono::seconds{10})) { - return; - } - const auto end_time = Common::RealTimeClock::Now(); - const u64 tsc_end = FencedRDTSC(); - // Calculate differences. - const u64 timer_diff = static_cast( - std::chrono::duration_cast(end_time - start_time).count()); - const u64 tsc_diff = tsc_end - tsc_start; - const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - rtsc_frequency = tsc_freq; - CalculateAndSetFactors(); - }}; - - time_point.inner.last_measure = FencedRDTSC(); - time_point.inner.accumulated_ticks = 0U; - CalculateAndSetFactors(); +u64 NativeClock::GetCNTPCT() const { + return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor); } -u64 NativeClock::GetRTSC() { - TimePoint new_time_point{}; - TimePoint current_time_point{}; - - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); - do { - const u64 current_measure = FencedRDTSC(); - u64 diff = current_measure - current_time_point.inner.last_measure; - diff = diff & ~static_cast(static_cast(diff) >> 63); // max(diff, 0) - new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure - ? current_measure - : current_time_point.inner.last_measure; - new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; - } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack, current_time_point.pack)); - return new_time_point.inner.accumulated_ticks; +u64 NativeClock::GetHostTicksNow() const { + return FencedRDTSC(); } -void NativeClock::Pause(bool is_paused) { - if (!is_paused) { - TimePoint current_time_point{}; - TimePoint new_time_point{}; - - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); - do { - new_time_point.pack = current_time_point.pack; - new_time_point.inner.last_measure = FencedRDTSC(); - } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack, current_time_point.pack)); - } -} - -std::chrono::nanoseconds NativeClock::GetTimeNS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; +u64 NativeClock::GetHostTicksElapsed() const { + return FencedRDTSC() - start_ticks; } -std::chrono::microseconds NativeClock::GetTimeUS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; +bool NativeClock::IsNative() const { + return true; } -std::chrono::milliseconds NativeClock::GetTimeMS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; -} - -u64 NativeClock::GetClockCycles() { - const u64 rtsc_value = GetRTSC(); - return MultiplyHigh(rtsc_value, clock_rtsc_factor); -} - -u64 NativeClock::GetCPUCycles() { - const u64 rtsc_value = GetRTSC(); - return MultiplyHigh(rtsc_value, cpu_rtsc_factor); -} - -void NativeClock::CalculateAndSetFactors() { - ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); - us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); - ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency); - clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); - cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); -} - -} // namespace X64 - -} // namespace Common +} // namespace Common::X64 diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 03ca291d8..d6f8626c1 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -3,58 +3,36 @@ #pragma once -#include "common/polyfill_thread.h" #include "common/wall_clock.h" -namespace Common { +namespace Common::X64 { -namespace X64 { class NativeClock final : public WallClock { public: - explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, - u64 rtsc_frequency_); + explicit NativeClock(u64 rdtsc_frequency_); - std::chrono::nanoseconds GetTimeNS() override; + std::chrono::nanoseconds GetTimeNS() const override; - std::chrono::microseconds GetTimeUS() override; + std::chrono::microseconds GetTimeUS() const override; - std::chrono::milliseconds GetTimeMS() override; + std::chrono::milliseconds GetTimeMS() const override; - u64 GetClockCycles() override; + u64 GetCNTPCT() const override; - u64 GetCPUCycles() override; + u64 GetHostTicksNow() const override; - void Pause(bool is_paused) override; + u64 GetHostTicksElapsed() const override; -private: - u64 GetRTSC(); - - void CalculateAndSetFactors(); - - union alignas(16) TimePoint { - TimePoint() : pack{} {} - u128 pack{}; - struct Inner { - u64 last_measure{}; - u64 accumulated_ticks{}; - } inner; - }; - - TimePoint time_point; + bool IsNative() const override; - // factors - u64 clock_rtsc_factor{}; - u64 cpu_rtsc_factor{}; - u64 ns_rtsc_factor{}; - u64 us_rtsc_factor{}; - u64 ms_rtsc_factor{}; - - u64 rtsc_frequency; +private: + u64 start_ticks; + u64 rdtsc_frequency; - std::jthread time_sync_thread; + u64 ns_rdtsc_factor; + u64 us_rdtsc_factor; + u64 ms_rdtsc_factor; + u64 cntpct_rdtsc_factor; }; -} // namespace X64 - -u64 EstimateRDTSCFrequency(); -} // namespace Common +} // namespace Common::X64 -- cgit v1.2.3 From 907507886d755fa56099713c4b8f05bb640a8b7d Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sun, 28 May 2023 17:45:47 -0400 Subject: (wall, native)_clock: Add GetGPUTick Allows us to directly calculate the GPU tick without double conversion to and from the host clock tick. --- src/common/wall_clock.cpp | 8 ++++++-- src/common/wall_clock.h | 20 +++++++++++++++++++- src/common/x64/native_clock.cpp | 7 ++++++- src/common/x64/native_clock.h | 3 +++ src/core/core_timing.cpp | 7 +++++++ src/core/core_timing.h | 3 +++ src/video_core/gpu.cpp | 11 +++-------- 7 files changed, 47 insertions(+), 12 deletions(-) (limited to 'src/common/x64') diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index ad8db06b0..dc0dcbd68 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -32,6 +32,10 @@ public: return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; } + u64 GetGPUTick() const override { + return GetHostTicksElapsed() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; + } + u64 GetHostTicksNow() const override { return static_cast(SteadyClock::Now().time_since_epoch().count()); } @@ -52,12 +56,12 @@ std::unique_ptr CreateOptimalClock() { #ifdef ARCHITECTURE_x86_64 const auto& caps = GetCPUCaps(); - if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::CNTFRQ) { + if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::GPUTickFreq) { return std::make_unique(caps.tsc_frequency); } else { // Fallback to StandardWallClock if the hardware TSC // - Is not invariant - // - Is not more precise than CNTFRQ + // - Is not more precise than GPUTickFreq return std::make_unique(); } #else diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 56c18ca25..fcfdd637c 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -13,7 +13,8 @@ namespace Common { class WallClock { public: - static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz + static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz + static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz virtual ~WallClock() = default; @@ -29,6 +30,9 @@ public: /// @returns The guest CNTPCT ticks since the construction of this clock. virtual u64 GetCNTPCT() const = 0; + /// @returns The guest GPU ticks since the construction of this clock. + virtual u64 GetGPUTick() const = 0; + /// @returns The raw host timer ticks since an indeterminate epoch. virtual u64 GetHostTicksNow() const = 0; @@ -46,6 +50,10 @@ public: return us * UsToCNTPCTRatio::num / UsToCNTPCTRatio::den; } + static inline u64 NSToGPUTick(u64 ns) { + return ns * NsToGPUTickRatio::num / NsToGPUTickRatio::den; + } + static inline u64 CNTPCTToNS(u64 cntpct) { return cntpct * NsToCNTPCTRatio::den / NsToCNTPCTRatio::num; } @@ -54,6 +62,14 @@ public: return cntpct * UsToCNTPCTRatio::den / UsToCNTPCTRatio::num; } + static inline u64 GPUTickToNS(u64 gpu_tick) { + return gpu_tick * NsToGPUTickRatio::den / NsToGPUTickRatio::num; + } + + static inline u64 CNTPCTToGPUTick(u64 cntpct) { + return cntpct * CNTPCTToGPUTickRatio::num / CNTPCTToGPUTickRatio::den; + } + protected: using NsRatio = std::nano; using UsRatio = std::micro; @@ -63,6 +79,8 @@ protected: using NsToMsRatio = std::ratio_divide; using NsToCNTPCTRatio = std::ratio; using UsToCNTPCTRatio = std::ratio; + using NsToGPUTickRatio = std::ratio; + using CNTPCTToGPUTickRatio = std::ratio; }; std::unique_ptr CreateOptimalClock(); diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 5d1eb0590..7d2a26bd9 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -12,7 +12,8 @@ NativeClock::NativeClock(u64 rdtsc_frequency_) ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)}, us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)}, ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)}, - cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)} {} + cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)}, + gputick_rdtsc_factor{GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency)} {} std::chrono::nanoseconds NativeClock::GetTimeNS() const { return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)}; @@ -30,6 +31,10 @@ u64 NativeClock::GetCNTPCT() const { return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor); } +u64 NativeClock::GetGPUTick() const { + return MultiplyHigh(GetHostTicksElapsed(), gputick_rdtsc_factor); +} + u64 NativeClock::GetHostTicksNow() const { return FencedRDTSC(); } diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index d6f8626c1..334415eff 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -19,6 +19,8 @@ public: u64 GetCNTPCT() const override; + u64 GetGPUTick() const override; + u64 GetHostTicksNow() const override; u64 GetHostTicksElapsed() const override; @@ -33,6 +35,7 @@ private: u64 us_rdtsc_factor; u64 ms_rdtsc_factor; u64 cntpct_rdtsc_factor; + u64 gputick_rdtsc_factor; }; } // namespace Common::X64 diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 9a1d5a69a..e57bca70a 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -197,6 +197,13 @@ u64 CoreTiming::GetClockTicks() const { return ticks; } +u64 CoreTiming::GetGPUTicks() const { + if (is_multicore) [[likely]] { + return clock->GetGPUTick(); + } + return Common::WallClock::CNTPCTToGPUTick(ticks); +} + std::optional CoreTiming::Advance() { std::scoped_lock lock{advance_lock, basic_lock}; global_timer = GetGlobalTimeNs().count(); diff --git a/src/core/core_timing.h b/src/core/core_timing.h index fdacdd94a..1873852c4 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -119,6 +119,9 @@ public: /// Returns the current CNTPCT tick value. u64 GetClockTicks() const; + /// Returns the current GPU tick value. + u64 GetGPUTicks() const; + /// Returns current time in microseconds. std::chrono::microseconds GetGlobalTimeUs() const; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 70762c51a..db385076d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -193,18 +193,13 @@ struct GPU::Impl { } [[nodiscard]] u64 GetTicks() const { - // This values were reversed engineered by fincs from NVN - // The GPU clock is 614.4 MHz - using NsToGPUTickRatio = std::ratio<614'400'000, std::nano::den>; - static_assert(NsToGPUTickRatio::num == 384 && NsToGPUTickRatio::den == 625); - - u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); + u64 gpu_tick = system.CoreTiming().GetGPUTicks(); if (Settings::values.use_fast_gpu_time.GetValue()) { - nanoseconds /= 256; + gpu_tick /= 256; } - return nanoseconds * NsToGPUTickRatio::num / NsToGPUTickRatio::den; + return gpu_tick; } [[nodiscard]] bool IsAsync() const { -- cgit v1.2.3