diff options
Diffstat (limited to 'src/common')
| -rw-r--r-- | src/common/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/common/assert.h | 2 | ||||
| -rw-r--r-- | src/common/bit_field.h | 2 | ||||
| -rw-r--r-- | src/common/bit_set.h | 3 | ||||
| -rw-r--r-- | src/common/code_block.h | 6 | ||||
| -rw-r--r-- | src/common/common_funcs.h | 4 | ||||
| -rw-r--r-- | src/common/file_util.cpp | 58 | ||||
| -rw-r--r-- | src/common/file_util.h | 41 | ||||
| -rw-r--r-- | src/common/logging/backend.cpp | 1 | ||||
| -rw-r--r-- | src/common/logging/log.h | 3 | ||||
| -rw-r--r-- | src/common/microprofile.h | 4 | ||||
| -rw-r--r-- | src/common/microprofileui.h | 3 | ||||
| -rw-r--r-- | src/common/profiler.cpp | 82 | ||||
| -rw-r--r-- | src/common/profiler.h | 152 | ||||
| -rw-r--r-- | src/common/profiler_reporting.h | 27 | ||||
| -rw-r--r-- | src/common/swap.h | 68 | ||||
| -rw-r--r-- | src/common/thread.h | 46 | ||||
| -rw-r--r-- | src/common/x64/emitter.cpp | 28 | ||||
| -rw-r--r-- | src/common/x64/emitter.h | 4 |
19 files changed, 146 insertions, 389 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index c839ce173..aa6eee2a3 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -47,7 +47,6 @@ set(HEADERS microprofile.h microprofileui.h platform.h - profiler.h profiler_reporting.h scm_rev.h scope_exit.h diff --git a/src/common/assert.h b/src/common/assert.h index 6849778b7..cd9b819a9 100644 --- a/src/common/assert.h +++ b/src/common/assert.h @@ -39,6 +39,7 @@ static void assert_noinline_call(const Fn& fn) { }); } while (0) #define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!") +#define UNREACHABLE_MSG(...) ASSERT_MSG(false, __VA_ARGS__) #ifdef _DEBUG #define DEBUG_ASSERT(_a_) ASSERT(_a_) @@ -49,3 +50,4 @@ static void assert_noinline_call(const Fn& fn) { #endif #define UNIMPLEMENTED() DEBUG_ASSERT_MSG(false, "Unimplemented code!") +#define UNIMPLEMENTED_MSG(_a_, ...) ASSERT_MSG(false, _a_, __VA_ARGS__)
\ No newline at end of file diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 371eb17a1..4748999ed 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h @@ -186,5 +186,5 @@ private: #pragma pack() #if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) -static_assert(std::is_trivially_copyable<BitField<0, 1, u32>>::value, "BitField must be trivially copyable"); +static_assert(std::is_trivially_copyable<BitField<0, 1, unsigned>>::value, "BitField must be trivially copyable"); #endif diff --git a/src/common/bit_set.h b/src/common/bit_set.h index 85f91e786..7f5de8df2 100644 --- a/src/common/bit_set.h +++ b/src/common/bit_set.h @@ -7,6 +7,7 @@ #include <intrin.h> #endif #include <initializer_list> +#include <new> #include <type_traits> #include "common/common_types.h" @@ -186,4 +187,4 @@ public: typedef Common::BitSet<u8> BitSet8; typedef Common::BitSet<u16> BitSet16; typedef Common::BitSet<u32> BitSet32; -typedef Common::BitSet<u64> BitSet64;
\ No newline at end of file +typedef Common::BitSet<u64> BitSet64; diff --git a/src/common/code_block.h b/src/common/code_block.h index 9ef7296d3..2fa4a0090 100644 --- a/src/common/code_block.h +++ b/src/common/code_block.h @@ -4,8 +4,10 @@ #pragma once -#include "common_types.h" -#include "memory_util.h" +#include <cstddef> + +#include "common/common_types.h" +#include "common/memory_util.h" // Everything that needs to generate code should inherit from this. // You get memory management for free, plus, you can use all emitter functions without diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index aa6aff7b9..ab3515683 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -4,6 +4,10 @@ #pragma once +#if !defined(ARCHITECTURE_x86_64) && !defined(_M_ARM) +#include <cstdlib> // for exit +#endif + #include "common_types.h" #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp index 9ada09f8a..6e2867658 100644 --- a/src/common/file_util.cpp +++ b/src/common/file_util.cpp @@ -69,9 +69,10 @@ static void StripTailDirSlashes(std::string &fname) { if (fname.length() > 1) { - size_t i = fname.length() - 1; - while (fname[i] == DIR_SEP_CHR) - fname[i--] = '\0'; + size_t i = fname.length(); + while (i > 0 && fname[i - 1] == DIR_SEP_CHR) + --i; + fname.resize(i); } return; } @@ -85,6 +86,10 @@ bool Exists(const std::string &filename) StripTailDirSlashes(copy); #ifdef _WIN32 + // Windows needs a slash to identify a driver root + if (copy.size() != 0 && copy.back() == ':') + copy += DIR_SEP_CHR; + int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info); #else int result = stat64(copy.c_str(), &file_info); @@ -102,6 +107,10 @@ bool IsDirectory(const std::string &filename) StripTailDirSlashes(copy); #ifdef _WIN32 + // Windows needs a slash to identify a driver root + if (copy.size() != 0 && copy.back() == ':') + copy += DIR_SEP_CHR; + int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info); #else int result = stat64(copy.c_str(), &file_info); @@ -824,13 +833,12 @@ size_t WriteStringToFile(bool text_file, const std::string &str, const char *fil size_t ReadFileToString(bool text_file, const char *filename, std::string &str) { - FileUtil::IOFile file(filename, text_file ? "r" : "rb"); - auto const f = file.GetHandle(); + IOFile file(filename, text_file ? "r" : "rb"); - if (!f) + if (!file) return false; - str.resize(static_cast<u32>(GetSize(f))); + str.resize(static_cast<u32>(file.GetSize())); return file.ReadArray(&str[0], str.size()); } @@ -877,15 +885,10 @@ void SplitFilename83(const std::string& filename, std::array<char, 9>& short_nam } IOFile::IOFile() - : m_file(nullptr), m_good(true) -{} - -IOFile::IOFile(std::FILE* file) - : m_file(file), m_good(true) -{} +{ +} IOFile::IOFile(const std::string& filename, const char openmode[]) - : m_file(nullptr), m_good(true) { Open(filename, openmode); } @@ -896,7 +899,6 @@ IOFile::~IOFile() } IOFile::IOFile(IOFile&& other) - : m_file(nullptr), m_good(true) { Swap(other); } @@ -935,26 +937,12 @@ bool IOFile::Close() return m_good; } -std::FILE* IOFile::ReleaseHandle() -{ - std::FILE* const ret = m_file; - m_file = nullptr; - return ret; -} - -void IOFile::SetHandle(std::FILE* file) -{ - Close(); - Clear(); - m_file = file; -} - -u64 IOFile::GetSize() +u64 IOFile::GetSize() const { if (IsOpen()) return FileUtil::GetSize(m_file); - else - return 0; + + return 0; } bool IOFile::Seek(s64 off, int origin) @@ -965,12 +953,12 @@ bool IOFile::Seek(s64 off, int origin) return m_good; } -u64 IOFile::Tell() +u64 IOFile::Tell() const { if (IsOpen()) return ftello(m_file); - else - return -1; + + return -1; } bool IOFile::Flush() diff --git a/src/common/file_util.h b/src/common/file_util.h index a85121aa6..c6a8694ce 100644 --- a/src/common/file_util.h +++ b/src/common/file_util.h @@ -7,13 +7,17 @@ #include <array> #include <fstream> #include <functional> -#include <cstddef> #include <cstdio> #include <string> +#include <type_traits> #include <vector> #include "common/common_types.h" +#ifdef _MSC_VER +#include "common/string_util.h" +#endif + // User directory indices for GetUserPath enum { D_USER_IDX, @@ -172,7 +176,6 @@ class IOFile : public NonCopyable { public: IOFile(); - IOFile(std::FILE* file); IOFile(const std::string& filename, const char openmode[]); ~IOFile(); @@ -188,6 +191,11 @@ public: template <typename T> size_t ReadArray(T* data, size_t length) { + static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); +#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) + static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); +#endif + if (!IsOpen()) { m_good = false; return -1; @@ -203,9 +211,10 @@ public: template <typename T> size_t WriteArray(const T* data, size_t length) { - static_assert(std::is_standard_layout<T>::value, "Given array does not consist of standard layout objects"); - // TODO: gcc 4.8 does not support is_trivially_copyable, but we really should check for it here. - //static_assert(std::is_trivially_copyable<T>::value, "Given array does not consist of trivially copyable objects"); + static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); +#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) + static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); +#endif if (!IsOpen()) { m_good = false; @@ -235,32 +244,24 @@ public: return WriteArray(&object, 1); } - bool IsOpen() { return nullptr != m_file; } + bool IsOpen() const { return nullptr != m_file; } // m_good is set to false when a read, write or other function fails - bool IsGood() { return m_good; } - operator void*() { return m_good ? m_file : nullptr; } - - std::FILE* ReleaseHandle(); - - std::FILE* GetHandle() { return m_file; } - - void SetHandle(std::FILE* file); + bool IsGood() const { return m_good; } + explicit operator bool() const { return IsGood(); } bool Seek(s64 off, int origin); - u64 Tell(); - u64 GetSize(); + u64 Tell() const; + u64 GetSize() const; bool Resize(u64 size); bool Flush(); // clear error state void Clear() { m_good = true; std::clearerr(m_file); } - std::FILE* m_file; - bool m_good; private: - IOFile(IOFile&); - IOFile& operator=(IOFile& other); + std::FILE* m_file = nullptr; + bool m_good = true; }; } // namespace diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 3d39f94d5..d7008fc66 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -65,6 +65,7 @@ namespace Log { SUB(Render, OpenGL) \ CLS(Audio) \ SUB(Audio, DSP) \ + SUB(Audio, Sink) \ CLS(Loader) // GetClassName is a macro defined by Windows.h, grrr... diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 521362317..c6910b1c7 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -78,8 +78,9 @@ enum class Class : ClassType { Render, ///< Emulator video output and hardware acceleration Render_Software, ///< Software renderer backend Render_OpenGL, ///< OpenGL backend - Audio, ///< Emulator audio output + Audio, ///< Audio emulation Audio_DSP, ///< The HLE implementation of the DSP + Audio_Sink, ///< Emulator audio output backend Loader, ///< ROM loader Count ///< Total number of logging classes diff --git a/src/common/microprofile.h b/src/common/microprofile.h index d3b6cb97c..ef312c6e1 100644 --- a/src/common/microprofile.h +++ b/src/common/microprofile.h @@ -4,6 +4,10 @@ #pragma once +// Uncomment this to disable microprofile. This will get you cleaner profiles when using +// external sampling profilers like "Very Sleepy", and will improve performance somewhat. +// #define MICROPROFILE_ENABLED 0 + // Customized Citra settings. // This file wraps the MicroProfile header so that these are consistent everywhere. #define MICROPROFILE_WEBSERVER 0 diff --git a/src/common/microprofileui.h b/src/common/microprofileui.h index 97c369bd9..41abe6b75 100644 --- a/src/common/microprofileui.h +++ b/src/common/microprofileui.h @@ -13,4 +13,7 @@ #define MICROPROFILE_HELP_ALT "Right-Click" #define MICROPROFILE_HELP_MOD "Ctrl" +// This isn't included by microprofileui.h :( +#include <cstdlib> // For std::abs + #include <microprofileui.h> diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp index 7792edd2f..49eb3f40c 100644 --- a/src/common/profiler.cpp +++ b/src/common/profiler.cpp @@ -7,71 +7,16 @@ #include <vector> #include "common/assert.h" -#include "common/profiler.h" #include "common/profiler_reporting.h" #include "common/synchronized_wrapper.h" -#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013. - #define WIN32_LEAN_AND_MEAN - #include <Windows.h> // For QueryPerformanceCounter/Frequency -#endif - namespace Common { namespace Profiling { -#if ENABLE_PROFILING -thread_local Timer* Timer::current_timer = nullptr; -#endif - -#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013 -QPCClock::time_point QPCClock::now() { - static LARGE_INTEGER freq; - // Use this dummy local static to ensure this gets initialized once. - static BOOL dummy = QueryPerformanceFrequency(&freq); - - LARGE_INTEGER ticks; - QueryPerformanceCounter(&ticks); - - // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The - // correct way to approach this would be to just return ticks as a time_point and then subtract - // and do this conversion when creating a duration from two time_points, however, as far as I - // could tell the C++ requirements for these types are incompatible with this approach. - return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart)); -} -#endif - -TimingCategory::TimingCategory(const char* name, TimingCategory* parent) - : accumulated_duration(0) { - - ProfilingManager& manager = GetProfilingManager(); - category_id = manager.RegisterTimingCategory(this, name); - if (parent != nullptr) - manager.SetTimingCategoryParent(category_id, parent->category_id); -} - ProfilingManager::ProfilingManager() : last_frame_end(Clock::now()), this_frame_start(Clock::now()) { } -unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) { - TimingCategoryInfo info; - info.category = category; - info.name = name; - info.parent = TimingCategoryInfo::NO_PARENT; - - unsigned int id = (unsigned int)timing_categories.size(); - timing_categories.push_back(std::move(info)); - - return id; -} - -void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) { - ASSERT(category < timing_categories.size()); - ASSERT(parent < timing_categories.size()); - - timing_categories[category].parent = parent; -} - void ProfilingManager::BeginFrame() { this_frame_start = Clock::now(); } @@ -82,11 +27,6 @@ void ProfilingManager::FinishFrame() { results.interframe_time = now - last_frame_end; results.frame_time = now - this_frame_start; - results.time_per_category.resize(timing_categories.size()); - for (size_t i = 0; i < timing_categories.size(); ++i) { - results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime(); - } - last_frame_end = now; } @@ -100,26 +40,9 @@ void TimingResultsAggregator::Clear() { window_size = cursor = 0; } -void TimingResultsAggregator::SetNumberOfCategories(size_t n) { - size_t old_size = times_per_category.size(); - if (n == old_size) - return; - - times_per_category.resize(n); - - for (size_t i = old_size; i < n; ++i) { - times_per_category[i].resize(max_window_size, Duration::zero()); - } -} - void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) { - SetNumberOfCategories(frame_result.time_per_category.size()); - interframe_times[cursor] = frame_result.interframe_time; frame_times[cursor] = frame_result.frame_time; - for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) { - times_per_category[i][cursor] = frame_result.time_per_category[i]; - } ++cursor; if (cursor == max_window_size) @@ -162,11 +85,6 @@ AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const { result.fps = 0.0f; } - result.time_per_category.resize(times_per_category.size()); - for (size_t i = 0; i < times_per_category.size(); ++i) { - result.time_per_category[i] = AggregateField(times_per_category[i], window_size); - } - return result; } diff --git a/src/common/profiler.h b/src/common/profiler.h deleted file mode 100644 index 3e967b4bc..000000000 --- a/src/common/profiler.h +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <atomic> -#include <chrono> - -#include "common/assert.h" -#include "common/thread.h" - -namespace Common { -namespace Profiling { - -// If this is defined to 0, it turns all Timers into no-ops. -#ifndef ENABLE_PROFILING -#define ENABLE_PROFILING 1 -#endif - -#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013 -// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad -// precision. We manually implement a clock based on QPC to get good results. - -struct QPCClock { - using duration = std::chrono::microseconds; - using time_point = std::chrono::time_point<QPCClock>; - using rep = duration::rep; - using period = duration::period; - static const bool is_steady = false; - - static time_point now(); -}; - -using Clock = QPCClock; -#else -using Clock = std::chrono::high_resolution_clock; -#endif - -using Duration = Clock::duration; - -/** - * Represents a timing category that measured time can be accounted towards. Should be declared as a - * global variable and passed to Timers. - */ -class TimingCategory final { -public: - TimingCategory(const char* name, TimingCategory* parent = nullptr); - - unsigned int GetCategoryId() const { - return category_id; - } - - /// Adds some time to this category. Can safely be called from multiple threads at the same time. - void AddTime(Duration amount) { - std::atomic_fetch_add_explicit( - &accumulated_duration, amount.count(), - std::memory_order_relaxed); - } - - /** - * Atomically retrieves the accumulated measured time for this category and resets the counter - * to zero. Can be safely called concurrently with AddTime. - */ - Duration GetAccumulatedTime() { - return Duration(std::atomic_exchange_explicit( - &accumulated_duration, (Duration::rep)0, - std::memory_order_relaxed)); - } - -private: - unsigned int category_id; - std::atomic<Duration::rep> accumulated_duration; -}; - -/** - * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given - * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be - * appropriately paired. - * - * When a Timer is started, it automatically pauses a previously running timer on the same thread, - * which is resumed when it is stopped. As such, no special action needs to be taken to avoid - * double-accounting of time on two categories. - */ -class Timer { -public: - Timer(TimingCategory& category) : category(category) { - } - - void Start() { -#if ENABLE_PROFILING - ASSERT(!running); - previous_timer = current_timer; - current_timer = this; - if (previous_timer != nullptr) - previous_timer->StopTiming(); - - StartTiming(); -#endif - } - - void Stop() { -#if ENABLE_PROFILING - ASSERT(running); - StopTiming(); - - if (previous_timer != nullptr) - previous_timer->StartTiming(); - current_timer = previous_timer; -#endif - } - -private: -#if ENABLE_PROFILING - void StartTiming() { - start = Clock::now(); - running = true; - } - - void StopTiming() { - auto duration = Clock::now() - start; - running = false; - category.AddTime(std::chrono::duration_cast<Duration>(duration)); - } - - Clock::time_point start; - bool running = false; - - Timer* previous_timer; - static thread_local Timer* current_timer; -#endif - - TimingCategory& category; -}; - -/** - * A Timer that automatically starts timing when created and stops at the end of the scope. Should - * be used in the majority of cases. - */ -class ScopeTimer : public Timer { -public: - ScopeTimer(TimingCategory& category) : Timer(category) { - Start(); - } - - ~ScopeTimer() { - Stop(); - } -}; - -} // namespace Profiling -} // namespace Common diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h index df98e05b7..fa1ac883f 100644 --- a/src/common/profiler_reporting.h +++ b/src/common/profiler_reporting.h @@ -4,22 +4,17 @@ #pragma once +#include <chrono> #include <cstddef> #include <vector> -#include "common/profiler.h" #include "common/synchronized_wrapper.h" namespace Common { namespace Profiling { -struct TimingCategoryInfo { - static const unsigned int NO_PARENT = -1; - - TimingCategory* category; - const char* name; - unsigned int parent; -}; +using Clock = std::chrono::high_resolution_clock; +using Duration = Clock::duration; struct ProfilingFrameResult { /// Time since the last delivered frame @@ -27,22 +22,12 @@ struct ProfilingFrameResult { /// Time spent processing a frame, excluding VSync Duration frame_time; - - /// Total amount of time spent inside each category in this frame. Indexed by the category id - std::vector<Duration> time_per_category; }; class ProfilingManager final { public: ProfilingManager(); - unsigned int RegisterTimingCategory(TimingCategory* category, const char* name); - void SetTimingCategoryParent(unsigned int category, unsigned int parent); - - const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const { - return timing_categories; - } - /// This should be called after swapping screen buffers. void BeginFrame(); /// This should be called before swapping screen buffers. @@ -54,7 +39,6 @@ public: } private: - std::vector<TimingCategoryInfo> timing_categories; Clock::time_point last_frame_end; Clock::time_point this_frame_start; @@ -73,9 +57,6 @@ struct AggregatedFrameResult { AggregatedDuration frame_time; float fps; - - /// Total amount of time spent inside each category in this frame. Indexed by the category id - std::vector<AggregatedDuration> time_per_category; }; class TimingResultsAggregator final { @@ -83,7 +64,6 @@ public: TimingResultsAggregator(size_t window_size); void Clear(); - void SetNumberOfCategories(size_t n); void AddFrame(const ProfilingFrameResult& frame_result); @@ -95,7 +75,6 @@ public: std::vector<Duration> interframe_times; std::vector<Duration> frame_times; - std::vector<std::vector<Duration>> times_per_category; }; ProfilingManager& GetProfilingManager(); diff --git a/src/common/swap.h b/src/common/swap.h index a7c37bc44..1749bd7a4 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -25,6 +25,8 @@ #include <sys/endian.h> #endif +#include <cstring> + #include "common/common_types.h" // GCC 4.6+ @@ -58,9 +60,6 @@ namespace Common { -inline u8 swap8(u8 _data) {return _data;} -inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) | _data[2];} - #ifdef _MSC_VER inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);} inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);} @@ -92,52 +91,29 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3 #endif inline float swapf(float f) { - union { - float f; - unsigned int u32; - } dat1, dat2; - - dat1.f = f; - dat2.u32 = swap32(dat1.u32); + static_assert(sizeof(u32) == sizeof(float), + "float must be the same size as uint32_t."); - return dat2.f; -} - -inline double swapd(double f) { - union { - double f; - unsigned long long u64; - } dat1, dat2; + u32 value; + std::memcpy(&value, &f, sizeof(u32)); - dat1.f = f; - dat2.u64 = swap64(dat1.u64); + value = swap32(value); + std::memcpy(&f, &value, sizeof(u32)); - return dat2.f; + return f; } -inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} -inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} -inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} - -template <int count> -void swap(u8*); +inline double swapd(double f) { + static_assert(sizeof(u64) == sizeof(double), + "double must be the same size as uint64_t."); -template <> -inline void swap<1>(u8* data) { } + u64 value; + std::memcpy(&value, &f, sizeof(u64)); -template <> -inline void swap<2>(u8* data) { - *reinterpret_cast<u16*>(data) = swap16(data); -} - -template <> -inline void swap<4>(u8* data) { - *reinterpret_cast<u32*>(data) = swap32(data); -} + value = swap64(value); + std::memcpy(&f, &value, sizeof(u64)); -template <> -inline void swap<8>(u8* data) { - *reinterpret_cast<u64*>(data) = swap64(data); + return f; } } // Namespace Common @@ -534,35 +510,35 @@ bool operator==(const S &p, const swap_struct_t<T, F> v) { template <typename T> struct swap_64_t { static T swap(T x) { - return (T)Common::swap64(*(u64 *)&x); + return static_cast<T>(Common::swap64(x)); } }; template <typename T> struct swap_32_t { static T swap(T x) { - return (T)Common::swap32(*(u32 *)&x); + return static_cast<T>(Common::swap32(x)); } }; template <typename T> struct swap_16_t { static T swap(T x) { - return (T)Common::swap16(*(u16 *)&x); + return static_cast<T>(Common::swap16(x)); } }; template <typename T> struct swap_float_t { static T swap(T x) { - return (T)Common::swapf(*(float *)&x); + return static_cast<T>(Common::swapf(x)); } }; template <typename T> struct swap_double_t { static T swap(T x) { - return (T)Common::swapd(*(double *)&x); + return static_cast<T>(Common::swapd(x)); } }; diff --git a/src/common/thread.h b/src/common/thread.h index 8255ee6d3..bbfa8befa 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -30,8 +30,7 @@ # endif #endif -namespace Common -{ +namespace Common { int CurrentThreadId(); @@ -43,55 +42,55 @@ public: Event() : is_set(false) {} void Set() { - std::lock_guard<std::mutex> lk(m_mutex); + std::lock_guard<std::mutex> lk(mutex); if (!is_set) { is_set = true; - m_condvar.notify_one(); + condvar.notify_one(); } } void Wait() { - std::unique_lock<std::mutex> lk(m_mutex); - m_condvar.wait(lk, [&]{ return is_set; }); + std::unique_lock<std::mutex> lk(mutex); + condvar.wait(lk, [&]{ return is_set; }); is_set = false; } void Reset() { - std::unique_lock<std::mutex> lk(m_mutex); + std::unique_lock<std::mutex> lk(mutex); // no other action required, since wait loops on the predicate and any lingering signal will get cleared on the first iteration is_set = false; } private: bool is_set; - std::condition_variable m_condvar; - std::mutex m_mutex; + std::condition_variable condvar; + std::mutex mutex; }; class Barrier { public: - Barrier(size_t count) : m_count(count), m_waiting(0) {} + explicit Barrier(size_t count_) : count(count_), waiting(0), generation(0) {} /// Blocks until all "count" threads have called Sync() void Sync() { - std::unique_lock<std::mutex> lk(m_mutex); + std::unique_lock<std::mutex> lk(mutex); + const size_t current_generation = generation; - // TODO: broken when next round of Sync()s - // is entered before all waiting threads return from the notify_all - - if (++m_waiting == m_count) { - m_waiting = 0; - m_condvar.notify_all(); + if (++waiting == count) { + generation++; + waiting = 0; + condvar.notify_all(); } else { - m_condvar.wait(lk, [&]{ return m_waiting == 0; }); + condvar.wait(lk, [this, current_generation]{ return current_generation != generation; }); } } private: - std::condition_variable m_condvar; - std::mutex m_mutex; - const size_t m_count; - size_t m_waiting; + std::condition_variable condvar; + std::mutex mutex; + const size_t count; + size_t waiting; + size_t generation; // Incremented once each time the barrier is used }; void SleepCurrentThread(int ms); @@ -100,8 +99,7 @@ void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms // Use this function during a spin-wait to make the current thread // relax while another thread is working. This may be more efficient // than using events because event functions use kernel calls. -inline void YieldCPU() -{ +inline void YieldCPU() { std::this_thread::yield(); } diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp index 1dcf2416c..5662f7f86 100644 --- a/src/common/x64/emitter.cpp +++ b/src/common/x64/emitter.cpp @@ -455,6 +455,18 @@ void XEmitter::CALL(const void* fnptr) Write32(u32(distance)); } +FixupBranch XEmitter::CALL() +{ + FixupBranch branch; + branch.type = 1; + branch.ptr = code + 5; + + Write8(0xE8); + Write32(0); + + return branch; +} + FixupBranch XEmitter::J(bool force5bytes) { FixupBranch branch; @@ -531,6 +543,22 @@ void XEmitter::SetJumpTarget(const FixupBranch& branch) } } +void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) +{ + if (branch.type == 0) + { + s64 distance = (s64)(target - branch.ptr); + ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); + branch.ptr[-1] = (u8)(s8)distance; + } + else if (branch.type == 1) + { + s64 distance = (s64)(target - branch.ptr); + ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); + ((s32*)branch.ptr)[-1] = (s32)distance; + } +} + //Single byte opcodes //There is no PUSHAD/POPAD in 64-bit mode. void XEmitter::INT3() {Write8(0xCC);} diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index 7c6548fb5..60a77dfe1 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h @@ -17,6 +17,8 @@ #pragma once +#include <cstddef> + #include "common/assert.h" #include "common/bit_set.h" #include "common/common_types.h" @@ -425,12 +427,14 @@ public: #undef CALL #endif void CALL(const void* fnptr); + FixupBranch CALL(); void CALLptr(OpArg arg); FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); void SetJumpTarget(const FixupBranch& branch); + void SetJumpTarget(const FixupBranch& branch, const u8* target); void SETcc(CCFlags flag, OpArg dest); // Note: CMOV brings small if any benefit on current cpus. |
