From b159b8b7aaa1187117336bdd80c49e36dcf3ee7b Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Mon, 29 Nov 2021 04:02:55 +0100 Subject: [PATCH] early-access version 2254 --- README.md | 2 +- src/common/thread.cpp | 12 +-- src/common/thread.h | 1 - src/common/uint128.h | 5 -- src/common/x64/native_clock.cpp | 41 +++++----- src/common/x64/native_clock.h | 13 ++-- src/core/core_timing.cpp | 132 +++++++++++--------------------- src/core/core_timing.h | 25 +++--- src/tests/core/core_timing.cpp | 4 +- 9 files changed, 89 insertions(+), 146 deletions(-) diff --git a/README.md b/README.md index 29d48244b..555a6f140 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 2253. +This is the source code for early-access 2254. ## Legal Notice diff --git a/src/common/thread.cpp b/src/common/thread.cpp index f07ba83da..946a1114d 100755 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -47,9 +47,6 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { case ThreadPriority::VeryHigh: windows_priority = THREAD_PRIORITY_HIGHEST; break; - case ThreadPriority::Critical: - windows_priority = THREAD_PRIORITY_TIME_CRITICAL; - break; default: windows_priority = THREAD_PRIORITY_NORMAL; break; @@ -62,10 +59,9 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { void SetCurrentThreadPriority(ThreadPriority new_priority) { pthread_t this_thread = pthread_self(); - const auto scheduling_type = SCHED_OTHER; - s32 max_prio = sched_get_priority_max(scheduling_type); - s32 min_prio = sched_get_priority_min(scheduling_type); - u32 level = std::max(static_cast(new_priority) + 1, 4U); + s32 max_prio = sched_get_priority_max(SCHED_OTHER); + s32 min_prio = sched_get_priority_min(SCHED_OTHER); + u32 level = static_cast(new_priority) + 1; struct sched_param params; if (max_prio > min_prio) { @@ -74,7 +70,7 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4; } - pthread_setschedparam(this_thread, scheduling_type, ¶ms); + pthread_setschedparam(this_thread, SCHED_OTHER, ¶ms); } #endif diff --git a/src/common/thread.h b/src/common/thread.h index fc1a53716..a8c17c71a 100755 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -92,7 +92,6 @@ enum class ThreadPriority : u32 { Normal = 1, High = 2, VeryHigh = 3, - Critical = 4, }; void SetCurrentThreadPriority(ThreadPriority new_priority); diff --git a/src/common/uint128.h b/src/common/uint128.h index 993d76c5b..4780b2f9d 100755 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -31,10 +31,6 @@ namespace Common { #else return _udiv128(r[1], r[0], d, &remainder); #endif -#else -#ifdef __SIZEOF_INT128__ - const auto product = static_cast(a) * static_cast(b); - return static_cast(product / d); #else const u64 diva = a / d; const u64 moda = a % d; @@ -42,7 +38,6 @@ namespace Common { const u64 modb = b % d; return diva * b + moda * divb + moda * modb / d; #endif -#endif } // This function multiplies 2 u64 values and produces a u128 value; diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 643e0bc1d..87de40624 100755 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -44,14 +44,12 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen u64 rtsc_frequency_) : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ rtsc_frequency_} { - TimePoint new_time_point{}; _mm_mfence(); - new_time_point.last_measure = __rdtsc(); - new_time_point.accumulated_ticks = 0U; - time_point.store(new_time_point); - ns_rtsc_factor = GetFixedPoint64Factor(1000'000'000ULL, rtsc_frequency); - us_rtsc_factor = GetFixedPoint64Factor(1000'000ULL, rtsc_frequency); - ms_rtsc_factor = GetFixedPoint64Factor(1000ULL, rtsc_frequency); + time_point.inner.last_measure = __rdtsc(); + time_point.inner.accumulated_ticks = 0U; + ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency); + us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency); + ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency); clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); } @@ -60,19 +58,19 @@ u64 NativeClock::GetRTSC() { TimePoint new_time_point{}; TimePoint current_time_point{}; do { - current_time_point = time_point.load(std::memory_order_acquire); + current_time_point.pack = time_point.pack; _mm_mfence(); const u64 current_measure = __rdtsc(); - u64 diff = current_measure - current_time_point.last_measure; + u64 diff = current_measure - current_time_point.inner.last_measure; diff = diff & ~static_cast(static_cast(diff) >> 63); // max(diff, 0) - new_time_point.last_measure = current_measure > current_time_point.last_measure - ? current_measure - : current_time_point.last_measure; - new_time_point.accumulated_ticks = current_time_point.accumulated_ticks + diff; - } while (!time_point.compare_exchange_weak( - current_time_point, new_time_point, std::memory_order_release, std::memory_order_relaxed)); + new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure + ? current_measure + : current_time_point.inner.last_measure; + new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; + } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, + current_time_point.pack)); /// The clock cannot be more precise than the guest timer, remove the lower bits - return new_time_point.accumulated_ticks & inaccuracy_mask; + return new_time_point.inner.accumulated_ticks & inaccuracy_mask; } void NativeClock::Pause(bool is_paused) { @@ -80,13 +78,12 @@ void NativeClock::Pause(bool is_paused) { TimePoint current_time_point{}; TimePoint new_time_point{}; do { - current_time_point = time_point.load(std::memory_order_acquire); - new_time_point = current_time_point; + current_time_point.pack = time_point.pack; + new_time_point.pack = current_time_point.pack; _mm_mfence(); - new_time_point.last_measure = __rdtsc(); - } while (!time_point.compare_exchange_weak(current_time_point, new_time_point, - std::memory_order_release, - std::memory_order_relaxed)); + new_time_point.inner.last_measure = __rdtsc(); + } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, + current_time_point.pack)); } } diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 97f5d0192..7cbd400d2 100755 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include "common/wall_clock.h" @@ -32,9 +31,13 @@ public: private: u64 GetRTSC(); - struct alignas(16) TimePoint { - u64 last_measure{}; - u64 accumulated_ticks{}; + union alignas(16) TimePoint { + TimePoint() : pack{} {} + u128 pack{}; + struct Inner { + u64 last_measure{}; + u64 accumulated_ticks{}; + } inner; }; /// value used to reduce the native clocks accuracy as some apss rely on @@ -42,7 +45,7 @@ private: /// be higher. static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1); - std::atomic time_point; + TimePoint time_point; // factors u64 clock_rtsc_factor{}; u64 cpu_rtsc_factor{}; diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 4cc75e4c5..c2f0f609f 100755 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -8,7 +8,6 @@ #include #include "common/microprofile.h" -#include "common/thread.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hardware_properties.h" @@ -47,7 +46,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) { constexpr char name[] = "yuzu:HostTiming"; MicroProfileOnThreadCreate(name); Common::SetCurrentThreadName(name); - Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical); + Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh); instance.on_thread_init(); instance.ThreadLoop(); MicroProfileOnThreadExit(); @@ -61,99 +60,68 @@ void CoreTiming::Initialize(std::function&& on_thread_init_) { const auto empty_timed_callback = [](std::uintptr_t, std::chrono::nanoseconds) {}; ev_lost = CreateEvent("_lost_event", empty_timed_callback); if (is_multicore) { - const auto hardware_concurrency = std::thread::hardware_concurrency(); - worker_threads.emplace_back(ThreadEntry, std::ref(*this)); - if (hardware_concurrency >= 6) { - worker_threads.emplace_back(ThreadEntry, std::ref(*this)); - } - if (hardware_concurrency >= 10) { - worker_threads.emplace_back(ThreadEntry, std::ref(*this)); - } + timer_thread = std::make_unique(ThreadEntry, std::ref(*this)); } } void CoreTiming::Shutdown() { - is_paused = true; + paused = true; shutting_down = true; - { - std::unique_lock main_lock(event_mutex); - event_cv.notify_all(); - wait_pause_cv.notify_all(); + pause_event.Set(); + event.Set(); + if (timer_thread) { + timer_thread->join(); } - for (auto& thread : worker_threads) { - thread.join(); - } - worker_threads.clear(); ClearPendingEvents(); + timer_thread.reset(); has_started = false; } -void CoreTiming::Pause(bool is_paused_) { - std::unique_lock main_lock(event_mutex); - if (is_paused_ == paused_state.load(std::memory_order_relaxed)) { - return; - } - if (is_multicore) { - is_paused = is_paused_; - event_cv.notify_all(); - if (!is_paused_) { - wait_pause_cv.notify_all(); - } - } - paused_state.store(is_paused_, std::memory_order_relaxed); +void CoreTiming::Pause(bool is_paused) { + paused = is_paused; + pause_event.Set(); } -void CoreTiming::SyncPause(bool is_paused_) { - std::unique_lock main_lock(event_mutex); - if (is_paused_ == paused_state.load(std::memory_order_relaxed)) { +void CoreTiming::SyncPause(bool is_paused) { + if (is_paused == paused && paused_set == paused) { return; } - - if (is_multicore) { - is_paused = is_paused_; - event_cv.notify_all(); - if (!is_paused_) { - wait_pause_cv.notify_all(); - } - } - paused_state.store(is_paused_, std::memory_order_relaxed); - if (is_multicore) { - if (is_paused_) { - wait_signal_cv.wait(main_lock, [this] { return pause_count == worker_threads.size(); }); - } else { - wait_signal_cv.wait(main_lock, [this] { return pause_count == 0; }); + Pause(is_paused); + if (timer_thread) { + if (!is_paused) { + pause_event.Set(); } + event.Set(); + while (paused_set != is_paused) + ; } } bool CoreTiming::IsRunning() const { - return !paused_state.load(std::memory_order_acquire); + return !paused_set; } bool CoreTiming::HasPendingEvents() const { - std::unique_lock main_lock(event_mutex); - return !event_queue.empty(); + return !(wait_set && event_queue.empty()); } void CoreTiming::ScheduleEvent(std::chrono::nanoseconds ns_into_future, const std::shared_ptr& event_type, std::uintptr_t user_data) { + { + std::scoped_lock scope{basic_lock}; + const u64 timeout = static_cast((GetGlobalTimeNs() + ns_into_future).count()); - std::unique_lock main_lock(event_mutex); - const u64 timeout = static_cast((GetGlobalTimeNs() + ns_into_future).count()); + event_queue.emplace_back(Event{timeout, event_fifo_id++, user_data, event_type}); - event_queue.emplace_back(Event{timeout, event_fifo_id++, user_data, event_type}); - - std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); - - if (is_multicore) { - event_cv.notify_one(); + std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); } + event.Set(); } void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, std::uintptr_t user_data) { - std::unique_lock main_lock(event_mutex); + std::scoped_lock scope{basic_lock}; const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type.lock().get() == event_type.get() && e.user_data == user_data; }); @@ -201,12 +169,11 @@ u64 CoreTiming::GetClockTicks() const { } void CoreTiming::ClearPendingEvents() { - std::unique_lock main_lock(event_mutex); event_queue.clear(); } void CoreTiming::RemoveEvent(const std::shared_ptr& event_type) { - std::unique_lock main_lock(event_mutex); + std::scoped_lock lock{basic_lock}; const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type.lock().get() == event_type.get(); @@ -220,22 +187,21 @@ void CoreTiming::RemoveEvent(const std::shared_ptr& event_type) { } std::optional CoreTiming::Advance() { + std::scoped_lock lock{advance_lock, basic_lock}; global_timer = GetGlobalTimeNs().count(); - std::unique_lock main_lock(event_mutex); while (!event_queue.empty() && event_queue.front().time <= global_timer) { Event evt = std::move(event_queue.front()); std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); event_queue.pop_back(); - event_mutex.unlock(); + basic_lock.unlock(); if (const auto event_type{evt.type.lock()}) { - std::unique_lock lk(event_type->guard); - event_type->callback(evt.user_data, std::chrono::nanoseconds{static_cast( - GetGlobalTimeNs().count() - evt.time)}); + event_type->callback( + evt.user_data, std::chrono::nanoseconds{static_cast(global_timer - evt.time)}); } - event_mutex.lock(); + basic_lock.lock(); global_timer = GetGlobalTimeNs().count(); } @@ -248,34 +214,26 @@ std::optional CoreTiming::Advance() { } void CoreTiming::ThreadLoop() { - const auto predicate = [this] { return !event_queue.empty() || is_paused; }; has_started = true; while (!shutting_down) { - while (!is_paused && !shutting_down) { + while (!paused) { + paused_set = false; const auto next_time = Advance(); if (next_time) { if (*next_time > 0) { std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time); - std::unique_lock main_lock(event_mutex); - event_cv.wait_for(main_lock, next_time_ns, predicate); + event.WaitFor(next_time_ns); } } else { - std::unique_lock main_lock(event_mutex); - event_cv.wait(main_lock, predicate); + wait_set = true; + event.Wait(); } + wait_set = false; } - std::unique_lock main_lock(event_mutex); - pause_count++; - if (pause_count == worker_threads.size()) { - clock->Pause(true); - wait_signal_cv.notify_all(); - } - wait_pause_cv.wait(main_lock, [this] { return !is_paused || shutting_down; }); - pause_count--; - if (pause_count == 0) { - clock->Pause(false); - wait_signal_cv.notify_all(); - } + paused_set = true; + clock->Pause(true); + pause_event.Wait(); + clock->Pause(false); } } diff --git a/src/core/core_timing.h b/src/core/core_timing.h index cbc667c69..888828fd0 100755 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -6,16 +6,16 @@ #include #include -#include #include #include -#include #include #include #include #include #include "common/common_types.h" +#include "common/spin_lock.h" +#include "common/thread.h" #include "common/wall_clock.h" namespace Core::Timing { @@ -33,7 +33,6 @@ struct EventType { TimedCallback callback; /// A pointer to the name of the event. const std::string name; - mutable std::mutex guard; }; /** @@ -148,21 +147,19 @@ private: u64 event_fifo_id = 0; std::shared_ptr ev_lost; + Common::Event event{}; + Common::Event pause_event{}; + Common::SpinLock basic_lock{}; + Common::SpinLock advance_lock{}; + std::unique_ptr timer_thread; + std::atomic paused{}; + std::atomic paused_set{}; + std::atomic wait_set{}; + std::atomic shutting_down{}; std::atomic has_started{}; std::function on_thread_init{}; - std::vector worker_threads; - - std::condition_variable event_cv; - std::condition_variable wait_pause_cv; - std::condition_variable wait_signal_cv; - mutable std::mutex event_mutex; - - std::atomic paused_state{}; - bool is_paused{}; - bool shutting_down{}; bool is_multicore{}; - size_t pause_count{}; /// Cycle timing u64 ticks{}; diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index 837eebb92..e0c66fa2e 100755 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include "core/core.h" @@ -23,14 +22,13 @@ std::array delays{}; std::bitset callbacks_ran_flags; u64 expected_callback = 0; -std::mutex control_mutex; template void HostCallbackTemplate(std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { - std::unique_lock lk(control_mutex); static_assert(IDX < CB_IDS.size(), "IDX out of range"); callbacks_ran_flags.set(IDX); REQUIRE(CB_IDS[IDX] == user_data); + REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]); delays[IDX] = ns_late.count(); ++expected_callback; }