early-access version 2639

This commit is contained in:
pineappleEA 2022-04-03 03:41:15 +02:00
parent 0397c1ff98
commit d15d58f409
25 changed files with 272 additions and 203 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 2637.
This is the source code for early-access 2639.
## Legal Notice

View file

@ -68,9 +68,6 @@ if (YUZU_USE_EXTERNAL_SDL2)
add_library(SDL2 ALIAS SDL2-static)
endif()
# SoundTouch
add_subdirectory(soundtouch)
# Cubeb
if(ENABLE_CUBEB)
set(BUILD_TESTS OFF CACHE BOOL "")

View file

@ -36,8 +36,6 @@ add_library(audio_core STATIC
splitter_context.h
stream.cpp
stream.h
time_stretch.cpp
time_stretch.h
voice_context.cpp
voice_context.h
@ -63,7 +61,6 @@ if (NOT MSVC)
endif()
target_link_libraries(audio_core PUBLIC common core)
target_link_libraries(audio_core PRIVATE SoundTouch)
if(ENABLE_CUBEB)
target_link_libraries(audio_core PRIVATE cubeb)

View file

@ -7,7 +7,6 @@
#include <cstring>
#include "audio_core/cubeb_sink.h"
#include "audio_core/stream.h"
#include "audio_core/time_stretch.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/ring_buffer.h"
@ -23,8 +22,7 @@ class CubebSinkStream final : public SinkStream {
public:
CubebSinkStream(cubeb* ctx_, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
const std::string& name)
: ctx{ctx_}, num_channels{std::min(num_channels_, 6u)}, time_stretch{sample_rate,
num_channels} {
: ctx{ctx_}, num_channels{std::min(num_channels_, 6u)} {
cubeb_stream_params params{};
params.rate = sample_rate;
@ -131,7 +129,6 @@ private:
Common::RingBuffer<s16, 0x10000> queue;
std::array<s16, 2> last_frame{};
std::atomic<bool> should_flush{};
TimeStretcher time_stretch;
static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
void* output_buffer, long num_frames);
@ -205,25 +202,7 @@ long CubebSinkStream::DataCallback([[maybe_unused]] cubeb_stream* stream, void*
const std::size_t num_channels = impl->GetNumChannels();
const std::size_t samples_to_write = num_channels * num_frames;
std::size_t samples_written;
/*
if (Settings::values.enable_audio_stretching.GetValue()) {
const std::vector<s16> in{impl->queue.Pop()};
const std::size_t num_in{in.size() / num_channels};
s16* const out{reinterpret_cast<s16*>(buffer)};
const std::size_t out_frames =
impl->time_stretch.Process(in.data(), num_in, out, num_frames);
samples_written = out_frames * num_channels;
if (impl->should_flush) {
impl->time_stretch.Flush();
impl->should_flush = false;
}
} else {
samples_written = impl->queue.Pop(buffer, samples_to_write);
}*/
samples_written = impl->queue.Pop(buffer, samples_to_write);
const std::size_t samples_written = impl->queue.Pop(buffer, samples_to_write);
if (samples_written >= num_channels) {
std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16),

View file

@ -7,7 +7,6 @@
#include <cstring>
#include "audio_core/sdl2_sink.h"
#include "audio_core/stream.h"
#include "audio_core/time_stretch.h"
#include "common/assert.h"
#include "common/logging/log.h"
//#include "common/settings.h"
@ -27,7 +26,7 @@ namespace AudioCore {
class SDLSinkStream final : public SinkStream {
public:
SDLSinkStream(u32 sample_rate, u32 num_channels_, const std::string& output_device)
: num_channels{std::min(num_channels_, 6u)}, time_stretch{sample_rate, num_channels} {
: num_channels{std::min(num_channels_, 6u)} {
SDL_AudioSpec spec;
spec.freq = sample_rate;
@ -116,7 +115,6 @@ private:
SDL_AudioDeviceID dev = 0;
u32 num_channels{};
std::atomic<bool> should_flush{};
TimeStretcher time_stretch;
};
SDLSink::SDLSink(std::string_view target_device_name) {

View file

@ -46,6 +46,43 @@ namespace Common {
reinterpret_cast<__int64*>(expected.data())) != 0;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
u8& actual) {
actual =
_InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
u16& actual) {
actual =
_InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
u32& actual) {
actual =
_InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
u64& actual) {
actual = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer), value,
expected);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
u128& actual) {
const bool result =
_InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
value[0], reinterpret_cast<__int64*>(expected.data())) != 0;
actual = expected;
return result;
}
[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
u128 result{};
_InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), result[1],
@ -79,6 +116,42 @@ namespace Common {
return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
u8& actual) {
actual = __sync_val_compare_and_swap(pointer, expected, value);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
u16& actual) {
actual = __sync_val_compare_and_swap(pointer, expected, value);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
u32& actual) {
actual = __sync_val_compare_and_swap(pointer, expected, value);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
u64& actual) {
actual = __sync_val_compare_and_swap(pointer, expected, value);
return actual == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
u128& actual) {
unsigned __int128 value_a;
unsigned __int128 expected_a;
unsigned __int128 actual_a;
std::memcpy(&value_a, value.data(), sizeof(u128));
std::memcpy(&expected_a, expected.data(), sizeof(u128));
actual_a = __sync_val_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
std::memcpy(actual.data(), &actual_a, sizeof(u128));
return actual_a == expected_a;
}
[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
unsigned __int128 zeros_a = 0;
unsigned __int128 result_a =

View file

@ -38,6 +38,7 @@ enum class CPUAccuracy : u32 {
Auto = 0,
Accurate = 1,
Unsafe = 2,
Paranoid = 3,
};
enum class FullscreenMode : u32 {
@ -470,7 +471,7 @@ struct Values {
// Cpu
RangedSetting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto,
CPUAccuracy::Unsafe, "cpu_accuracy"};
CPUAccuracy::Paranoid, "cpu_accuracy"};
// TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021
BasicSetting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"};
BasicSetting<bool> cpu_debug_mode{false, "cpu_debug_mode"};

View file

@ -55,8 +55,9 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
u64 NativeClock::GetRTSC() {
TimePoint new_time_point{};
TimePoint current_time_point{};
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do {
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
_mm_mfence();
const u64 current_measure = __rdtsc();
u64 diff = current_measure - current_time_point.inner.last_measure;
@ -66,7 +67,7 @@ u64 NativeClock::GetRTSC() {
: current_time_point.inner.last_measure;
new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
current_time_point.pack));
current_time_point.pack, current_time_point.pack));
/// The clock cannot be more precise than the guest timer, remove the lower bits
return new_time_point.inner.accumulated_ticks & inaccuracy_mask;
}
@ -75,13 +76,14 @@ void NativeClock::Pause(bool is_paused) {
if (!is_paused) {
TimePoint current_time_point{};
TimePoint new_time_point{};
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do {
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
new_time_point.pack = current_time_point.pack;
_mm_mfence();
new_time_point.inner.last_measure = __rdtsc();
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
current_time_point.pack));
current_time_point.pack, current_time_point.pack));
}
}

View file

@ -186,35 +186,41 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_recompile_exclusives) {
config.recompile_on_exclusive_fastmem_failure = false;
}
}
} else {
// Unsafe optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
config.unsafe_optimizations = true;
if (Settings::values.cpuopt_unsafe_unfuse_fma) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
}
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
}
if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
}
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
}
// Unsafe optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
config.unsafe_optimizations = true;
if (Settings::values.cpuopt_unsafe_unfuse_fma) {
// Curated optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) {
config.unsafe_optimizations = true;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
}
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
}
if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
}
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
}
// Curated optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) {
config.unsafe_optimizations = true;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
// Paranoia mode for debugging optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Paranoid) {
config.unsafe_optimizations = false;
config.optimizations = Dynarmic::no_optimizations;
}
}
return std::make_unique<Dynarmic::A32::Jit>(config);

View file

@ -248,35 +248,41 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_recompile_exclusives) {
config.recompile_on_exclusive_fastmem_failure = false;
}
}
} else {
// Unsafe optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
config.unsafe_optimizations = true;
if (Settings::values.cpuopt_unsafe_unfuse_fma) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
}
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
}
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}
if (Settings::values.cpuopt_unsafe_fastmem_check) {
config.fastmem_address_space_bits = 64;
}
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
}
// Unsafe optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
config.unsafe_optimizations = true;
if (Settings::values.cpuopt_unsafe_unfuse_fma) {
// Curated optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) {
config.unsafe_optimizations = true;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
}
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
}
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}
if (Settings::values.cpuopt_unsafe_fastmem_check) {
config.fastmem_address_space_bits = 64;
}
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
}
// Curated optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) {
config.unsafe_optimizations = true;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
config.fastmem_address_space_bits = 64;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
// Paranoia mode for debugging optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Paranoid) {
config.unsafe_optimizations = false;
config.optimizations = Dynarmic::no_optimizations;
}
}
return std::make_shared<Dynarmic::A64::Jit>(config);

View file

@ -18,8 +18,7 @@ BufferQueueConsumer::BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_)
BufferQueueConsumer::~BufferQueueConsumer() = default;
Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer,
std::chrono::nanoseconds expected_present,
u64 max_frame_number) {
std::chrono::nanoseconds expected_present) {
std::scoped_lock lock(core->mutex);
// Check that the consumer doesn't currently have the maximum number of buffers acquired.
@ -50,12 +49,6 @@ Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer,
while (core->queue.size() > 1 && !core->queue[0].is_auto_timestamp) {
const auto& buffer_item{core->queue[1]};
// If dropping entry[0] would leave us with a buffer that the consumer is not yet ready
// for, don't drop it.
if (max_frame_number && buffer_item.frame_number > max_frame_number) {
break;
}
// If entry[1] is timely, drop entry[0] (and repeat).
const auto desired_present = buffer_item.timestamp;
if (desired_present < expected_present.count() - MAX_REASONABLE_NSEC ||
@ -200,4 +193,39 @@ Status BufferQueueConsumer::Connect(std::shared_ptr<IConsumerListener> consumer_
return Status::NoError;
}
Status BufferQueueConsumer::GetReleasedBuffers(u64* out_slot_mask) {
if (out_slot_mask == nullptr) {
LOG_ERROR(Service_NVFlinger, "out_slot_mask may not be nullptr");
return Status::BadValue;
}
std::scoped_lock lock(core->mutex);
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
return Status::NoInit;
}
u64 mask = 0;
for (int s = 0; s < BufferQueueDefs::NUM_BUFFER_SLOTS; ++s) {
if (!slots[s].acquire_called) {
mask |= (1ULL << s);
}
}
// Remove from the mask queued buffers for which acquire has been called, since the consumer
// will not receive their buffer addresses and so must retain their cached information
auto current(core->queue.begin());
while (current != core->queue.end()) {
if (current->acquire_called) {
mask &= ~(1ULL << current->slot);
}
++current;
}
LOG_DEBUG(Service_NVFlinger, "returning mask {}", mask);
*out_slot_mask = mask;
return Status::NoError;
}
} // namespace Service::android

View file

@ -24,10 +24,10 @@ public:
explicit BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_);
~BufferQueueConsumer();
Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present,
u64 max_frame_number = 0);
Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present);
Status ReleaseBuffer(s32 slot, u64 frame_number, const Fence& release_fence);
Status Connect(std::shared_ptr<IConsumerListener> consumer_listener, bool controlled_by_app);
Status GetReleasedBuffers(u64* out_slot_mask);
private:
std::shared_ptr<BufferQueueCore> core;

View file

@ -95,7 +95,6 @@ void BufferQueueCore::FreeBufferLocked(s32 slot) {
}
void BufferQueueCore::FreeAllBuffersLocked() {
queue.clear();
buffer_has_been_queued = false;
for (s32 slot = 0; slot < BufferQueueDefs::NUM_BUFFER_SLOTS; ++slot) {

View file

@ -73,8 +73,6 @@ private:
u32 transform_hint{};
bool is_allocating{};
mutable std::condition_variable_any is_allocating_condition;
bool allow_allocation{true};
u64 buffer_age{};
bool is_shutting_down{};
};

View file

@ -62,11 +62,12 @@ Status BufferQueueProducer::RequestBuffer(s32 slot, std::shared_ptr<GraphicBuffe
Status BufferQueueProducer::SetBufferCount(s32 buffer_count) {
LOG_DEBUG(Service_NVFlinger, "count = {}", buffer_count);
std::shared_ptr<IConsumerListener> listener;
std::shared_ptr<IConsumerListener> listener;
{
std::scoped_lock lock(core->mutex);
core->WaitWhileAllocatingLocked();
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
return Status::NoInit;
@ -120,7 +121,7 @@ Status BufferQueueProducer::SetBufferCount(s32 buffer_count) {
}
Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
Status* returnFlags) const {
Status* return_flags) const {
bool try_again = true;
while (try_again) {
@ -142,10 +143,12 @@ Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
ASSERT(slots[s].buffer_state == BufferState::Free);
if (slots[s].graphic_buffer != nullptr) {
core->FreeBufferLocked(s);
*returnFlags |= Status::ReleaseAllBuffers;
*return_flags |= Status::ReleaseAllBuffers;
}
}
// Look for a free buffer to give to the client
*found = BufferQueueCore::INVALID_BUFFER_SLOT;
s32 dequeued_count{};
s32 acquired_count{};
for (s32 s{}; s < max_buffer_count; ++s) {
@ -235,68 +238,50 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
{
std::scoped_lock lock(core->mutex);
core->WaitWhileAllocatingLocked();
if (format == PixelFormat::NoFormat) {
format = core->default_buffer_format;
}
// Enable the usage bits the consumer requested
usage |= core->consumer_usage_bit;
s32 found{};
Status status = WaitForFreeSlotThenRelock(async, &found, &return_flags);
if (status != Status::NoError) {
return status;
}
// This should not happen
if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
LOG_ERROR(Service_NVFlinger, "no available buffer slots");
return Status::Busy;
}
*out_slot = found;
attached_by_consumer = slots[found].attached_by_consumer;
const bool use_default_size = !width && !height;
if (use_default_size) {
width = core->default_width;
height = core->default_height;
}
s32 found = BufferItem::INVALID_BUFFER_SLOT;
while (found == BufferItem::INVALID_BUFFER_SLOT) {
Status status = WaitForFreeSlotThenRelock(async, &found, &return_flags);
if (status != Status::NoError) {
return status;
}
// This should not happen
if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
LOG_DEBUG(Service_NVFlinger, "no available buffer slots");
return Status::Busy;
}
const std::shared_ptr<GraphicBuffer>& buffer(slots[found].graphic_buffer);
// If we are not allowed to allocate new buffers, WaitForFreeSlotThenRelock must have
// returned a slot containing a buffer. If this buffer would require reallocation to
// meet the requested attributes, we free it and attempt to get another one.
if (!core->allow_allocation) {
if (buffer->NeedsReallocation(width, height, format, usage)) {
core->FreeBufferLocked(found);
found = BufferItem::INVALID_BUFFER_SLOT;
continue;
}
}
}
*out_slot = found;
attached_by_consumer = slots[found].attached_by_consumer;
slots[found].buffer_state = BufferState::Dequeued;
const std::shared_ptr<GraphicBuffer>& buffer(slots[found].graphic_buffer);
if ((buffer == nullptr) || buffer->NeedsReallocation(width, height, format, usage)) {
if ((buffer == nullptr) || (buffer->Width() != width) || (buffer->Height() != height) ||
(buffer->Format() != format) || ((buffer->Usage() & usage) != usage)) {
slots[found].acquire_called = false;
slots[found].graphic_buffer = nullptr;
slots[found].request_buffer_called = false;
slots[found].fence = Fence::NoFence();
core->buffer_age = 0;
return_flags |= Status::BufferNeedsReallocation;
} else {
// We add 1 because that will be the frame number when this buffer
// is queued
core->buffer_age = core->frame_counter + 1 - slots[found].frame_number;
}
LOG_DEBUG(Service_NVFlinger, "setting buffer age to {}", core->buffer_age);
*out_fence = slots[found].fence;
slots[found].fence = Fence::NoFence();
}
@ -311,6 +296,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
{
std::scoped_lock lock(core->mutex);
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
return Status::NoInit;
@ -327,6 +313,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
LOG_DEBUG(Service_NVFlinger, "returning slot={} frame={}, flags={}", *out_slot,
slots[*out_slot].frame_number, return_flags);
return return_flags;
}
@ -334,6 +321,7 @@ Status BufferQueueProducer::DetachBuffer(s32 slot) {
LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
std::scoped_lock lock(core->mutex);
if (core->is_abandoned) {
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
return Status::NoInit;
@ -369,7 +357,6 @@ Status BufferQueueProducer::DetachNextBuffer(std::shared_ptr<GraphicBuffer>* out
}
std::scoped_lock lock(core->mutex);
core->WaitWhileAllocatingLocked();
if (core->is_abandoned) {
@ -423,6 +410,7 @@ Status BufferQueueProducer::AttachBuffer(s32* out_slot,
return status;
}
// This should not happen
if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
LOG_ERROR(Service_NVFlinger, "No available buffer slots");
return Status::Busy;
@ -466,8 +454,8 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
return Status::BadValue;
}
std::shared_ptr<IConsumerListener> frameAvailableListener;
std::shared_ptr<IConsumerListener> frameReplacedListener;
std::shared_ptr<IConsumerListener> frame_available_listener;
std::shared_ptr<IConsumerListener> frame_replaced_listener;
s32 callback_ticket{};
BufferItem item;
@ -541,12 +529,13 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
item.fence = fence;
item.is_droppable = core->dequeue_buffer_cannot_block || async;
item.swap_interval = swap_interval;
sticky_transform = sticky_transform_;
if (core->queue.empty()) {
// When the queue is empty, we can simply queue this buffer
core->queue.push_back(item);
frameAvailableListener = core->consumer_listener;
frame_available_listener = core->consumer_listener;
} else {
// When the queue is not empty, we need to look at the front buffer
// state to see if we need to replace it
@ -563,10 +552,10 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
}
// Overwrite the droppable buffer with the incoming one
*front = item;
frameReplacedListener = core->consumer_listener;
frame_replaced_listener = core->consumer_listener;
} else {
core->queue.push_back(item);
frameAvailableListener = core->consumer_listener;
frame_available_listener = core->consumer_listener;
}
}
@ -592,10 +581,10 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
callback_condition.wait(callback_mutex);
}
if (frameAvailableListener != nullptr) {
frameAvailableListener->OnFrameAvailable(item);
} else if (frameReplacedListener != nullptr) {
frameReplacedListener->OnFrameReplaced(item);
if (frame_available_listener != nullptr) {
frame_available_listener->OnFrameAvailable(item);
} else if (frame_replaced_listener != nullptr) {
frame_replaced_listener->OnFrameReplaced(item);
}
++current_callback_ticket;
@ -669,13 +658,6 @@ Status BufferQueueProducer::Query(NativeWindow what, s32* out_value) {
case NativeWindow::ConsumerUsageBits:
value = core->consumer_usage_bit;
break;
case NativeWindow::BufferAge:
if (core->buffer_age > INT32_MAX) {
value = 0;
} else {
value = static_cast<u32>(core->buffer_age);
}
break;
default:
UNREACHABLE();
return Status::BadValue;
@ -737,7 +719,6 @@ Status BufferQueueProducer::Connect(const std::shared_ptr<IProducerListener>& li
core->buffer_has_been_queued = false;
core->dequeue_buffer_cannot_block =
core->consumer_controlled_by_app && producer_controlled_by_app;
core->allow_allocation = true;
return status;
}
@ -770,7 +751,7 @@ Status BufferQueueProducer::Disconnect(NativeWindowApi api) {
core->SignalDequeueCondition();
buffer_wait_event->GetWritableEvent().Signal();
listener = core->consumer_listener;
} else if (core->connected_api != NativeWindowApi::NoConnectedApi) {
} else {
LOG_ERROR(Service_NVFlinger, "still connected to another api (cur = {} req = {})",
core->connected_api, api);
status = Status::BadValue;

View file

@ -66,7 +66,7 @@ public:
private:
BufferQueueProducer(const BufferQueueProducer&) = delete;
Status WaitForFreeSlotThenRelock(bool async, s32* found, Status* returnFlags) const;
Status WaitForFreeSlotThenRelock(bool async, s32* found, Status* return_flags) const;
Kernel::KEvent* buffer_wait_event{};
Service::KernelHelpers::ServiceContext& service_context;

View file

@ -36,38 +36,41 @@ void ConsumerBase::FreeBufferLocked(s32 slot_index) {
}
void ConsumerBase::OnFrameAvailable(const BufferItem& item) {
std::scoped_lock lock(mutex);
LOG_DEBUG(Service_NVFlinger, "called");
}
void ConsumerBase::OnFrameReplaced(const BufferItem& item) {
std::scoped_lock lock(mutex);
LOG_DEBUG(Service_NVFlinger, "called");
}
void ConsumerBase::OnBuffersReleased() {
std::scoped_lock lock(mutex);
LOG_DEBUG(Service_NVFlinger, "called");
if (is_abandoned) {
// Nothing to do if we're already abandoned.
return;
}
u64 mask = 0;
consumer->GetReleasedBuffers(&mask);
for (int i = 0; i < BufferQueueDefs::NUM_BUFFER_SLOTS; i++) {
if (mask & (1ULL << i)) {
FreeBufferLocked(i);
}
}
}
void ConsumerBase::OnSidebandStreamChanged() {}
Status ConsumerBase::AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when,
u64 max_frame_number) {
if (is_abandoned) {
LOG_ERROR(Service_NVFlinger, "consumer is abandoned!");
return Status::NoInit;
}
Status err = consumer->AcquireBuffer(item, present_when, max_frame_number);
Status ConsumerBase::AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when) {
Status err = consumer->AcquireBuffer(item, present_when);
if (err != Status::NoError) {
return err;
}
if (item->graphic_buffer != nullptr) {
if (slots[item->slot].graphic_buffer != nullptr) {
FreeBufferLocked(item->slot);
}
slots[item->slot].graphic_buffer = item->graphic_buffer;
}

View file

@ -35,8 +35,7 @@ protected:
virtual void OnSidebandStreamChanged() override;
void FreeBufferLocked(s32 slot_index);
Status AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when,
u64 max_frame_number = 0);
Status AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when);
Status ReleaseBufferLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer);
bool StillTracking(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer) const;
Status AddReleaseFenceLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer,

View file

@ -104,7 +104,7 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
const auto lock_guard = Lock();
LOG_DEBUG(Service, "Opening \"{}\" display", name);
LOG_DEBUG(Service_NVFlinger, "Opening \"{}\" display", name);
const auto itr =
std::find_if(displays.begin(), displays.end(),
@ -219,7 +219,7 @@ VI::Layer* NVFlinger::FindOrCreateLayer(u64 display_id, u64 layer_id) {
auto* layer = display->FindLayer(layer_id);
if (layer == nullptr) {
LOG_DEBUG(Service, "Layer at id {} not found. Trying to create it.", layer_id);
LOG_DEBUG(Service_NVFlinger, "Layer at id {} not found. Trying to create it.", layer_id);
CreateLayerAtId(*display, layer_id);
return display->FindLayer(layer_id);
}

View file

@ -39,21 +39,9 @@ GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std
return gpu_addr;
}
void MemoryManager::UnmapSubmappedRanges(GPUVAddr gpu_addr, std::size_t size) {
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
for (const auto& [map_addr, map_size] : submapped_ranges) {
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
const std::optional<VAddr> cpu_vaddr = GpuToCpuAddress(map_addr);
if (!cpu_vaddr) {
continue;
}
rasterizer->UnmapMemory(*cpu_vaddr, map_size);
}
}
GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
// Unmap any pre-existing rasterizer memory in this range
UnmapSubmappedRanges(gpu_addr, size);
// Mark any pre-existing rasterizer memory in this range as remapped
rasterizer->ModifyGPUMemory(gpu_addr, size);
const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
if (it != map_ranges.end() && it->first == gpu_addr) {
@ -85,8 +73,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
} else {
UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
}
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
for (const auto& [map_addr, map_size] : submapped_ranges) {
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
ASSERT(cpu_addr);
rasterizer->UnmapMemory(*cpu_addr, map_size);
}
UnmapSubmappedRanges(gpu_addr, size);
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
}

View file

@ -149,7 +149,6 @@ private:
[[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
void UnmapSubmappedRanges(GPUVAddr gpu_addr, std::size_t size);
[[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
bool start_32bit_address = false) const;

View file

@ -454,15 +454,20 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
});
for (const ImageId image_id : images) {
Image& image = slot_images[image_id];
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
DownloadImage(image_id);
}
}
template <class P>
void TextureCache<P>::DownloadImage(ImageId image_id) {
Image& image = slot_images[image_id];
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
template <class P>
void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
std::vector<ImageId> deleted_images;
@ -1058,7 +1063,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
for (const ImageId overlap_id : ignore_textures) {
Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
UNIMPLEMENTED();
DownloadImage(overlap_id);
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id);

View file

@ -139,6 +139,9 @@ public:
/// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory
void DownloadImage(ImageId image_id);
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);

View file

@ -52,6 +52,11 @@
<string>Unsafe</string>
</property>
</item>
<item>
<property name="text">
<string>Paranoid (disables most optimizations)</string>
</property>
</item>
</widget>
</item>
</layout>

View file

@ -342,12 +342,6 @@ fps_cap =
# null: No audio output
output_engine =
# Whether or not to enable the audio-stretching post-processing effect.
# This effect adjusts audio speed to match emulation speed and helps prevent audio stutter,
# at the cost of increasing audio latency.
# 0: No, 1 (default): Yes
enable_audio_stretching =
# Which audio device to use.
# auto (default): Auto-select
output_device =