early-access version 2805

This commit is contained in:
pineappleEA 2022-06-28 04:53:32 +02:00
parent 5cf5fef861
commit 44079208eb
11 changed files with 143 additions and 102 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 2804. This is the source code for early-access 2805.
## Legal Notice ## Legal Notice

View file

@ -47,6 +47,9 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) {
case ThreadPriority::VeryHigh: case ThreadPriority::VeryHigh:
windows_priority = THREAD_PRIORITY_HIGHEST; windows_priority = THREAD_PRIORITY_HIGHEST;
break; break;
case ThreadPriority::Critical:
windows_priority = THREAD_PRIORITY_TIME_CRITICAL;
break;
default: default:
windows_priority = THREAD_PRIORITY_NORMAL; windows_priority = THREAD_PRIORITY_NORMAL;
break; break;
@ -59,9 +62,10 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) {
void SetCurrentThreadPriority(ThreadPriority new_priority) { void SetCurrentThreadPriority(ThreadPriority new_priority) {
pthread_t this_thread = pthread_self(); pthread_t this_thread = pthread_self();
s32 max_prio = sched_get_priority_max(SCHED_OTHER); const auto scheduling_type = SCHED_OTHER;
s32 min_prio = sched_get_priority_min(SCHED_OTHER); s32 max_prio = sched_get_priority_max(scheduling_type);
u32 level = static_cast<u32>(new_priority) + 1; s32 min_prio = sched_get_priority_min(scheduling_type);
u32 level = std::max(static_cast<u32>(new_priority) + 1, 4U);
struct sched_param params; struct sched_param params;
if (max_prio > min_prio) { if (max_prio > min_prio) {
@ -70,7 +74,7 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) {
params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4; params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4;
} }
pthread_setschedparam(this_thread, SCHED_OTHER, &params); pthread_setschedparam(this_thread, scheduling_type, &params);
} }
#endif #endif

View file

@ -92,6 +92,7 @@ enum class ThreadPriority : u32 {
Normal = 1, Normal = 1,
High = 2, High = 2,
VeryHigh = 3, VeryHigh = 3,
Critical = 4,
}; };
void SetCurrentThreadPriority(ThreadPriority new_priority); void SetCurrentThreadPriority(ThreadPriority new_priority);

View file

@ -30,6 +30,10 @@ namespace Common {
#else #else
return _udiv128(r[1], r[0], d, &remainder); return _udiv128(r[1], r[0], d, &remainder);
#endif #endif
#else
#ifdef __SIZEOF_INT128__
const auto product = static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b);
return static_cast<u64>(product / d);
#else #else
const u64 diva = a / d; const u64 diva = a / d;
const u64 moda = a % d; const u64 moda = a % d;
@ -37,6 +41,7 @@ namespace Common {
const u64 modb = b % d; const u64 modb = b % d;
return diva * b + moda * divb + moda * modb / d; return diva * b + moda * divb + moda * modb / d;
#endif #endif
#endif
} }
// This function multiplies 2 u64 values and produces a u128 value; // This function multiplies 2 u64 values and produces a u128 value;

View file

@ -65,8 +65,10 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
u64 rtsc_frequency_) u64 rtsc_frequency_)
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
rtsc_frequency_} { rtsc_frequency_} {
time_point.inner.last_measure = FencedRDTSC(); TimePoint new_time_point{};
time_point.inner.accumulated_ticks = 0U; new_time_point.last_measure = FencedRDTSC();
new_time_point.accumulated_ticks = 0U;
time_point.store(new_time_point);
ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);
us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency);
ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency); ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency);
@ -76,34 +78,31 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
u64 NativeClock::GetRTSC() { u64 NativeClock::GetRTSC() {
TimePoint new_time_point{}; TimePoint new_time_point{};
TimePoint current_time_point{}; TimePoint current_time_point = time_point.load(std::memory_order_acquire);
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do { do {
const u64 current_measure = FencedRDTSC(); const u64 current_measure = FencedRDTSC();
u64 diff = current_measure - current_time_point.inner.last_measure; u64 diff = current_measure - current_time_point.last_measure;
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure new_time_point.last_measure = current_measure > current_time_point.last_measure
? current_measure ? current_measure
: current_time_point.inner.last_measure; : current_time_point.last_measure;
new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; new_time_point.accumulated_ticks = current_time_point.accumulated_ticks + diff;
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, } while (!time_point.compare_exchange_weak(
current_time_point.pack, current_time_point.pack)); current_time_point, new_time_point, std::memory_order_release, std::memory_order_acquire));
/// The clock cannot be more precise than the guest timer, remove the lower bits /// The clock cannot be more precise than the guest timer, remove the lower bits
return new_time_point.inner.accumulated_ticks & inaccuracy_mask; return new_time_point.accumulated_ticks;
} }
void NativeClock::Pause(bool is_paused) { void NativeClock::Pause(bool is_paused) {
if (!is_paused) { if (!is_paused) {
TimePoint current_time_point{};
TimePoint new_time_point{}; TimePoint new_time_point{};
TimePoint current_time_point = time_point.load(std::memory_order_acquire);
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do { do {
new_time_point.pack = current_time_point.pack; new_time_point = current_time_point;
new_time_point.inner.last_measure = FencedRDTSC(); new_time_point.last_measure = FencedRDTSC();
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, } while (!time_point.compare_exchange_weak(current_time_point, new_time_point,
current_time_point.pack, current_time_point.pack)); std::memory_order_release,
std::memory_order_acquire));
} }
} }

View file

@ -3,6 +3,7 @@
#pragma once #pragma once
#include <atomic>
#include "common/wall_clock.h" #include "common/wall_clock.h"
namespace Common { namespace Common {
@ -28,21 +29,12 @@ public:
private: private:
u64 GetRTSC(); u64 GetRTSC();
union alignas(16) TimePoint { struct alignas(16) TimePoint {
TimePoint() : pack{} {} u64 last_measure{};
u128 pack{}; u64 accumulated_ticks{};
struct Inner {
u64 last_measure{};
u64 accumulated_ticks{};
} inner;
}; };
/// value used to reduce the native clocks accuracy as some apss rely on std::atomic<TimePoint> time_point;
/// undefined behavior where the level of accuracy in the clock shouldn't
/// be higher.
static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
TimePoint time_point;
// factors // factors
u64 clock_rtsc_factor{}; u64 clock_rtsc_factor{};
u64 cpu_rtsc_factor{}; u64 cpu_rtsc_factor{};

View file

@ -7,6 +7,7 @@
#include <tuple> #include <tuple>
#include "common/microprofile.h" #include "common/microprofile.h"
#include "common/thread.h"
#include "core/core_timing.h" #include "core/core_timing.h"
#include "core/core_timing_util.h" #include "core/core_timing_util.h"
#include "core/hardware_properties.h" #include "core/hardware_properties.h"
@ -45,7 +46,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) {
constexpr char name[] = "yuzu:HostTiming"; constexpr char name[] = "yuzu:HostTiming";
MicroProfileOnThreadCreate(name); MicroProfileOnThreadCreate(name);
Common::SetCurrentThreadName(name); Common::SetCurrentThreadName(name);
Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh); Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);
instance.on_thread_init(); instance.on_thread_init();
instance.ThreadLoop(); instance.ThreadLoop();
MicroProfileOnThreadExit(); MicroProfileOnThreadExit();
@ -59,68 +60,96 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
const auto empty_timed_callback = [](std::uintptr_t, std::chrono::nanoseconds) {}; const auto empty_timed_callback = [](std::uintptr_t, std::chrono::nanoseconds) {};
ev_lost = CreateEvent("_lost_event", empty_timed_callback); ev_lost = CreateEvent("_lost_event", empty_timed_callback);
if (is_multicore) { if (is_multicore) {
timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this)); const auto hardware_concurrency = std::thread::hardware_concurrency();
worker_threads.emplace_back(ThreadEntry, std::ref(*this));
if (hardware_concurrency > 8) {
worker_threads.emplace_back(ThreadEntry, std::ref(*this));
}
} }
} }
void CoreTiming::Shutdown() { void CoreTiming::Shutdown() {
paused = true; is_paused = true;
shutting_down = true; shutting_down = true;
pause_event.Set(); {
event.Set(); std::unique_lock main_lock(event_mutex);
if (timer_thread) { event_cv.notify_all();
timer_thread->join(); wait_pause_cv.notify_all();
} }
for (auto& thread : worker_threads) {
thread.join();
}
worker_threads.clear();
ClearPendingEvents(); ClearPendingEvents();
timer_thread.reset();
has_started = false; has_started = false;
} }
void CoreTiming::Pause(bool is_paused) { void CoreTiming::Pause(bool is_paused_) {
paused = is_paused; std::unique_lock main_lock(event_mutex);
pause_event.Set(); if (is_paused_ == paused_state.load(std::memory_order_relaxed)) {
}
void CoreTiming::SyncPause(bool is_paused) {
if (is_paused == paused && paused_set == paused) {
return; return;
} }
Pause(is_paused); if (is_multicore) {
if (timer_thread) { is_paused = is_paused_;
if (!is_paused) { event_cv.notify_all();
pause_event.Set(); if (!is_paused_) {
wait_pause_cv.notify_all();
}
}
paused_state.store(is_paused_, std::memory_order_relaxed);
}
void CoreTiming::SyncPause(bool is_paused_) {
std::unique_lock main_lock(event_mutex);
if (is_paused_ == paused_state.load(std::memory_order_relaxed)) {
return;
}
if (is_multicore) {
is_paused = is_paused_;
event_cv.notify_all();
if (!is_paused_) {
wait_pause_cv.notify_all();
}
}
paused_state.store(is_paused_, std::memory_order_relaxed);
if (is_multicore) {
if (is_paused_) {
wait_signal_cv.wait(main_lock, [this] { return pause_count == worker_threads.size(); });
} else {
wait_signal_cv.wait(main_lock, [this] { return pause_count == 0; });
} }
event.Set();
while (paused_set != is_paused)
;
} }
} }
bool CoreTiming::IsRunning() const { bool CoreTiming::IsRunning() const {
return !paused_set; return !paused_state.load(std::memory_order_acquire);
} }
bool CoreTiming::HasPendingEvents() const { bool CoreTiming::HasPendingEvents() const {
return !(wait_set && event_queue.empty()); std::unique_lock main_lock(event_mutex);
return !event_queue.empty();
} }
void CoreTiming::ScheduleEvent(std::chrono::nanoseconds ns_into_future, void CoreTiming::ScheduleEvent(std::chrono::nanoseconds ns_into_future,
const std::shared_ptr<EventType>& event_type, const std::shared_ptr<EventType>& event_type,
std::uintptr_t user_data) { std::uintptr_t user_data) {
{
std::scoped_lock scope{basic_lock};
const u64 timeout = static_cast<u64>((GetGlobalTimeNs() + ns_into_future).count());
event_queue.emplace_back(Event{timeout, event_fifo_id++, user_data, event_type}); std::unique_lock main_lock(event_mutex);
const u64 timeout = static_cast<u64>((GetGlobalTimeNs() + ns_into_future).count());
std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); event_queue.emplace_back(Event{timeout, event_fifo_id++, user_data, event_type});
std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
if (is_multicore) {
event_cv.notify_one();
} }
event.Set();
} }
void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type,
std::uintptr_t user_data) { std::uintptr_t user_data) {
std::scoped_lock scope{basic_lock}; std::unique_lock main_lock(event_mutex);
const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
return e.type.lock().get() == event_type.get() && e.user_data == user_data; return e.type.lock().get() == event_type.get() && e.user_data == user_data;
}); });
@ -168,11 +197,12 @@ u64 CoreTiming::GetClockTicks() const {
} }
void CoreTiming::ClearPendingEvents() { void CoreTiming::ClearPendingEvents() {
std::unique_lock main_lock(event_mutex);
event_queue.clear(); event_queue.clear();
} }
void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) { void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
std::scoped_lock lock{basic_lock}; std::unique_lock main_lock(event_mutex);
const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
return e.type.lock().get() == event_type.get(); return e.type.lock().get() == event_type.get();
@ -186,21 +216,22 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
} }
std::optional<s64> CoreTiming::Advance() { std::optional<s64> CoreTiming::Advance() {
std::scoped_lock lock{advance_lock, basic_lock};
global_timer = GetGlobalTimeNs().count(); global_timer = GetGlobalTimeNs().count();
std::unique_lock main_lock(event_mutex);
while (!event_queue.empty() && event_queue.front().time <= global_timer) { while (!event_queue.empty() && event_queue.front().time <= global_timer) {
Event evt = std::move(event_queue.front()); Event evt = std::move(event_queue.front());
std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
event_queue.pop_back(); event_queue.pop_back();
basic_lock.unlock(); event_mutex.unlock();
if (const auto event_type{evt.type.lock()}) { if (const auto event_type{evt.type.lock()}) {
event_type->callback( std::unique_lock lk(event_type->guard);
evt.user_data, std::chrono::nanoseconds{static_cast<s64>(global_timer - evt.time)}); event_type->callback(evt.user_data, std::chrono::nanoseconds{static_cast<s64>(
GetGlobalTimeNs().count() - evt.time)});
} }
basic_lock.lock(); event_mutex.lock();
global_timer = GetGlobalTimeNs().count(); global_timer = GetGlobalTimeNs().count();
} }
@ -213,26 +244,34 @@ std::optional<s64> CoreTiming::Advance() {
} }
void CoreTiming::ThreadLoop() { void CoreTiming::ThreadLoop() {
const auto predicate = [this] { return !event_queue.empty() || is_paused; };
has_started = true; has_started = true;
while (!shutting_down) { while (!shutting_down) {
while (!paused) { while (!is_paused && !shutting_down) {
paused_set = false;
const auto next_time = Advance(); const auto next_time = Advance();
if (next_time) { if (next_time) {
if (*next_time > 0) { if (*next_time > 0) {
std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time); std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
event.WaitFor(next_time_ns); std::unique_lock main_lock(event_mutex);
event_cv.wait_for(main_lock, next_time_ns, predicate);
} }
} else { } else {
wait_set = true; std::unique_lock main_lock(event_mutex);
event.Wait(); event_cv.wait(main_lock, predicate);
} }
wait_set = false;
} }
paused_set = true; std::unique_lock main_lock(event_mutex);
clock->Pause(true); pause_count++;
pause_event.Wait(); if (pause_count == worker_threads.size()) {
clock->Pause(false); clock->Pause(true);
wait_signal_cv.notify_all();
}
wait_pause_cv.wait(main_lock, [this] { return !is_paused || shutting_down; });
pause_count--;
if (pause_count == 0) {
clock->Pause(false);
wait_signal_cv.notify_all();
}
} }
} }

View file

@ -5,6 +5,7 @@
#include <atomic> #include <atomic>
#include <chrono> #include <chrono>
#include <condition_variable>
#include <functional> #include <functional>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
@ -14,7 +15,6 @@
#include <vector> #include <vector>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/thread.h"
#include "common/wall_clock.h" #include "common/wall_clock.h"
namespace Core::Timing { namespace Core::Timing {
@ -32,6 +32,7 @@ struct EventType {
TimedCallback callback; TimedCallback callback;
/// A pointer to the name of the event. /// A pointer to the name of the event.
const std::string name; const std::string name;
mutable std::mutex guard;
}; };
/** /**
@ -146,19 +147,21 @@ private:
u64 event_fifo_id = 0; u64 event_fifo_id = 0;
std::shared_ptr<EventType> ev_lost; std::shared_ptr<EventType> ev_lost;
Common::Event event{};
Common::Event pause_event{};
std::mutex basic_lock;
std::mutex advance_lock;
std::unique_ptr<std::thread> timer_thread;
std::atomic<bool> paused{};
std::atomic<bool> paused_set{};
std::atomic<bool> wait_set{};
std::atomic<bool> shutting_down{};
std::atomic<bool> has_started{}; std::atomic<bool> has_started{};
std::function<void()> on_thread_init{}; std::function<void()> on_thread_init{};
std::vector<std::thread> worker_threads;
std::condition_variable event_cv;
std::condition_variable wait_pause_cv;
std::condition_variable wait_signal_cv;
mutable std::mutex event_mutex;
std::atomic<bool> paused_state{};
bool is_paused{};
bool shutting_down{};
bool is_multicore{}; bool is_multicore{};
size_t pause_count{};
/// Cycle timing /// Cycle timing
u64 ticks{}; u64 ticks{};

View file

@ -41,22 +41,18 @@ NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress)
size = Common::AlignUp(size, PAGE_SIZE); size = Common::AlignUp(size, PAGE_SIZE);
aligned_size = Common::AlignUp(size, align); aligned_size = Common::AlignUp(size, align);
address = pAddress; address = pAddress;
// TODO: pin init
allocated = true; allocated = true;
return NvResult::Success; return NvResult::Success;
} }
NvResult NvMap::Handle::Duplicate(bool internal_session) { NvResult NvMap::Handle::Duplicate(bool internal_session) {
std::scoped_lock lock(mutex);
// Unallocated handles cannot be duplicated as duplication requires memory accounting (in HOS) // Unallocated handles cannot be duplicated as duplication requires memory accounting (in HOS)
if (!allocated) [[unlikely]] { if (!allocated) [[unlikely]] {
return NvResult::BadValue; return NvResult::BadValue;
} }
std::scoped_lock lock(mutex);
// If we internally use FromId the duplication tracking of handles won't work accurately due to // If we internally use FromId the duplication tracking of handles won't work accurately due to
// us not implementing per-process handle refs. // us not implementing per-process handle refs.
if (internal_session) { if (internal_session) {

View file

@ -270,12 +270,12 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
} }
} }
gpu.PushGPUEntries(bind_id, std::move(entries));
params.fence.id = channel_syncpoint; params.fence.id = channel_syncpoint;
u32 increment{(flags.fence_increment.Value() != 0 ? 2 : 0) + u32 increment{(flags.fence_increment.Value() != 0 ? 2 : 0) +
(flags.increment_value.Value() != 0 ? params.fence.value : 0)}; (flags.increment_value.Value() != 0 ? params.fence.value : 0)};
params.fence.value = syncpoint_manager.IncrementSyncpointMaxExt(channel_syncpoint, increment); params.fence.value = syncpoint_manager.IncrementSyncpointMaxExt(channel_syncpoint, increment);
gpu.PushGPUEntries(bind_id, std::move(entries));
if (flags.fence_increment.Value()) { if (flags.fence_increment.Value()) {
if (flags.suppress_wfi.Value()) { if (flags.suppress_wfi.Value()) {

View file

@ -8,6 +8,7 @@
#include <chrono> #include <chrono>
#include <cstdlib> #include <cstdlib>
#include <memory> #include <memory>
#include <mutex>
#include <string> #include <string>
#include "core/core.h" #include "core/core.h"
@ -21,13 +22,14 @@ std::array<s64, 5> delays{};
std::bitset<CB_IDS.size()> callbacks_ran_flags; std::bitset<CB_IDS.size()> callbacks_ran_flags;
u64 expected_callback = 0; u64 expected_callback = 0;
std::mutex control_mutex;
template <unsigned int IDX> template <unsigned int IDX>
void HostCallbackTemplate(std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { void HostCallbackTemplate(std::uintptr_t user_data, std::chrono::nanoseconds ns_late) {
std::unique_lock<std::mutex> lk(control_mutex);
static_assert(IDX < CB_IDS.size(), "IDX out of range"); static_assert(IDX < CB_IDS.size(), "IDX out of range");
callbacks_ran_flags.set(IDX); callbacks_ran_flags.set(IDX);
REQUIRE(CB_IDS[IDX] == user_data); REQUIRE(CB_IDS[IDX] == user_data);
REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]);
delays[IDX] = ns_late.count(); delays[IDX] = ns_late.count();
++expected_callback; ++expected_callback;
} }