From cd1fbfcf1b70e365d81480ec0f56db19ed02454f Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Thu, 5 Feb 2015 14:53:25 -0200 Subject: Add profiling infrastructure and widget --- src/common/profiler.h | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 src/common/profiler.h (limited to 'src/common/profiler.h') diff --git a/src/common/profiler.h b/src/common/profiler.h new file mode 100644 index 000000000..53c4f6eaf --- /dev/null +++ b/src/common/profiler.h @@ -0,0 +1,134 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/assert.h" +#include "common/thread.h" + +namespace Common { +namespace Profiling { + +// If this is defined to 0, it turns all Timers into no-ops. +#ifndef ENABLE_PROFILING +#define ENABLE_PROFILING 1 +#endif + +using Duration = std::chrono::nanoseconds; +using Clock = std::chrono::high_resolution_clock; + +/** + * Represents a timing category that measured time can be accounted towards. Should be declared as a + * global variable and passed to Timers. + */ +class TimingCategory final { +public: + TimingCategory(const char* name, TimingCategory* parent = nullptr); + + unsigned int GetCategoryId() const { + return category_id; + } + + /// Adds some time to this category. Can safely be called from multiple threads at the same time. + void AddTime(Duration amount) { + std::atomic_fetch_add_explicit( + &accumulated_duration, amount.count(), + std::memory_order_relaxed); + } + + /** + * Atomically retrieves the accumulated measured time for this category and resets the counter + * to zero. Can be safely called concurrently with AddTime. + */ + Duration GetAccumulatedTime() { + return Duration(std::atomic_exchange_explicit( + &accumulated_duration, (Duration::rep)0, + std::memory_order_relaxed)); + } + +private: + unsigned int category_id; + std::atomic accumulated_duration; +}; + +/** + * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given + * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be + * appropriately paired. + * + * When a Timer is started, it automatically pauses a previously running timer on the same thread, + * which is resumed when it is stopped. As such, no special action needs to be taken to avoid + * double-accounting of time on two categories. + */ +class Timer { +public: + Timer(TimingCategory& category) : category(category) { + } + + void Start() { +#if ENABLE_PROFILING + ASSERT(!running); + previous_timer = current_timer; + current_timer = this; + if (previous_timer != nullptr) + previous_timer->StopTiming(); + + StartTiming(); +#endif + } + + void Stop() { +#if ENABLE_PROFILING + ASSERT(running); + StopTiming(); + + if (previous_timer != nullptr) + previous_timer->StartTiming(); + current_timer = previous_timer; +#endif + } + +private: +#if ENABLE_PROFILING + void StartTiming() { + start = Clock::now(); + running = true; + } + + void StopTiming() { + auto duration = Clock::now() - start; + running = false; + category.AddTime(std::chrono::duration_cast(duration)); + } + + Clock::time_point start; + bool running = false; + + Timer* previous_timer; + static thread_local Timer* current_timer; +#endif + + TimingCategory& category; +}; + +/** + * A Timer that automatically starts timing when created and stops at the end of the scope. Should + * be used in the majority of cases. + */ +class ScopeTimer : public Timer { +public: + ScopeTimer(TimingCategory& category) : Timer(category) { + Start(); + } + + ~ScopeTimer() { + Stop(); + } +}; + +} // namespace Profiling +} // namespace Common -- cgit v1.2.3 From dc8a3f8bc842df1b3eeeb5a283556ac644ab3183 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sun, 15 Feb 2015 15:49:27 -0200 Subject: Profiler: Implement QPCClock to get better precision on Win32 MSVC 2013 (at least) doesn't use QueryPerformanceCounter to implement std::chrono::high_resolution_clock, so it has bad precision. Manually implementing our own clock type using it works around this for now. --- src/common/profiler.cpp | 23 +++++++++++++++++++++++ src/common/profiler.h | 20 +++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'src/common/profiler.h') diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp index c37546af0..65c3df167 100644 --- a/src/common/profiler.cpp +++ b/src/common/profiler.cpp @@ -6,6 +6,12 @@ #include "common/profiler_reporting.h" #include "common/assert.h" +#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013. +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN +#include // For QueryPerformanceCounter/Frequency +#endif + namespace Common { namespace Profiling { @@ -13,6 +19,23 @@ namespace Profiling { thread_local Timer* Timer::current_timer = nullptr; #endif +#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013 +QPCClock::time_point QPCClock::now() { + static LARGE_INTEGER freq; + // Use this dummy local static to ensure this gets initialized once. + static BOOL dummy = QueryPerformanceFrequency(&freq); + + LARGE_INTEGER ticks; + QueryPerformanceCounter(&ticks); + + // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The + // correct way to approach this would be to just return ticks as a time_point and then subtract + // and do this conversion when creating a duration from two time_points, however, as far as I + // could tell the C++ requirements for these types are incompatible with this approach. + return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart)); +} +#endif + TimingCategory::TimingCategory(const char* name, TimingCategory* parent) : accumulated_duration(0) { diff --git a/src/common/profiler.h b/src/common/profiler.h index 53c4f6eaf..3e967b4bc 100644 --- a/src/common/profiler.h +++ b/src/common/profiler.h @@ -18,8 +18,26 @@ namespace Profiling { #define ENABLE_PROFILING 1 #endif -using Duration = std::chrono::nanoseconds; +#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013 +// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad +// precision. We manually implement a clock based on QPC to get good results. + +struct QPCClock { + using duration = std::chrono::microseconds; + using time_point = std::chrono::time_point; + using rep = duration::rep; + using period = duration::period; + static const bool is_steady = false; + + static time_point now(); +}; + +using Clock = QPCClock; +#else using Clock = std::chrono::high_resolution_clock; +#endif + +using Duration = Clock::duration; /** * Represents a timing category that measured time can be accounted towards. Should be declared as a -- cgit v1.2.3