early-access version 2639
This commit is contained in:
parent
0397c1ff98
commit
d15d58f409
25 changed files with 272 additions and 203 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 2637.
|
This is the source code for early-access 2639.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
3
externals/CMakeLists.txt
vendored
3
externals/CMakeLists.txt
vendored
|
@ -68,9 +68,6 @@ if (YUZU_USE_EXTERNAL_SDL2)
|
||||||
add_library(SDL2 ALIAS SDL2-static)
|
add_library(SDL2 ALIAS SDL2-static)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# SoundTouch
|
|
||||||
add_subdirectory(soundtouch)
|
|
||||||
|
|
||||||
# Cubeb
|
# Cubeb
|
||||||
if(ENABLE_CUBEB)
|
if(ENABLE_CUBEB)
|
||||||
set(BUILD_TESTS OFF CACHE BOOL "")
|
set(BUILD_TESTS OFF CACHE BOOL "")
|
||||||
|
|
|
@ -36,8 +36,6 @@ add_library(audio_core STATIC
|
||||||
splitter_context.h
|
splitter_context.h
|
||||||
stream.cpp
|
stream.cpp
|
||||||
stream.h
|
stream.h
|
||||||
time_stretch.cpp
|
|
||||||
time_stretch.h
|
|
||||||
voice_context.cpp
|
voice_context.cpp
|
||||||
voice_context.h
|
voice_context.h
|
||||||
|
|
||||||
|
@ -63,7 +61,6 @@ if (NOT MSVC)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
target_link_libraries(audio_core PUBLIC common core)
|
target_link_libraries(audio_core PUBLIC common core)
|
||||||
target_link_libraries(audio_core PRIVATE SoundTouch)
|
|
||||||
|
|
||||||
if(ENABLE_CUBEB)
|
if(ENABLE_CUBEB)
|
||||||
target_link_libraries(audio_core PRIVATE cubeb)
|
target_link_libraries(audio_core PRIVATE cubeb)
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include "audio_core/cubeb_sink.h"
|
#include "audio_core/cubeb_sink.h"
|
||||||
#include "audio_core/stream.h"
|
#include "audio_core/stream.h"
|
||||||
#include "audio_core/time_stretch.h"
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/ring_buffer.h"
|
#include "common/ring_buffer.h"
|
||||||
|
@ -23,8 +22,7 @@ class CubebSinkStream final : public SinkStream {
|
||||||
public:
|
public:
|
||||||
CubebSinkStream(cubeb* ctx_, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
|
CubebSinkStream(cubeb* ctx_, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
|
||||||
const std::string& name)
|
const std::string& name)
|
||||||
: ctx{ctx_}, num_channels{std::min(num_channels_, 6u)}, time_stretch{sample_rate,
|
: ctx{ctx_}, num_channels{std::min(num_channels_, 6u)} {
|
||||||
num_channels} {
|
|
||||||
|
|
||||||
cubeb_stream_params params{};
|
cubeb_stream_params params{};
|
||||||
params.rate = sample_rate;
|
params.rate = sample_rate;
|
||||||
|
@ -131,7 +129,6 @@ private:
|
||||||
Common::RingBuffer<s16, 0x10000> queue;
|
Common::RingBuffer<s16, 0x10000> queue;
|
||||||
std::array<s16, 2> last_frame{};
|
std::array<s16, 2> last_frame{};
|
||||||
std::atomic<bool> should_flush{};
|
std::atomic<bool> should_flush{};
|
||||||
TimeStretcher time_stretch;
|
|
||||||
|
|
||||||
static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
|
static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
|
||||||
void* output_buffer, long num_frames);
|
void* output_buffer, long num_frames);
|
||||||
|
@ -205,25 +202,7 @@ long CubebSinkStream::DataCallback([[maybe_unused]] cubeb_stream* stream, void*
|
||||||
|
|
||||||
const std::size_t num_channels = impl->GetNumChannels();
|
const std::size_t num_channels = impl->GetNumChannels();
|
||||||
const std::size_t samples_to_write = num_channels * num_frames;
|
const std::size_t samples_to_write = num_channels * num_frames;
|
||||||
std::size_t samples_written;
|
const std::size_t samples_written = impl->queue.Pop(buffer, samples_to_write);
|
||||||
|
|
||||||
/*
|
|
||||||
if (Settings::values.enable_audio_stretching.GetValue()) {
|
|
||||||
const std::vector<s16> in{impl->queue.Pop()};
|
|
||||||
const std::size_t num_in{in.size() / num_channels};
|
|
||||||
s16* const out{reinterpret_cast<s16*>(buffer)};
|
|
||||||
const std::size_t out_frames =
|
|
||||||
impl->time_stretch.Process(in.data(), num_in, out, num_frames);
|
|
||||||
samples_written = out_frames * num_channels;
|
|
||||||
|
|
||||||
if (impl->should_flush) {
|
|
||||||
impl->time_stretch.Flush();
|
|
||||||
impl->should_flush = false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
samples_written = impl->queue.Pop(buffer, samples_to_write);
|
|
||||||
}*/
|
|
||||||
samples_written = impl->queue.Pop(buffer, samples_to_write);
|
|
||||||
|
|
||||||
if (samples_written >= num_channels) {
|
if (samples_written >= num_channels) {
|
||||||
std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16),
|
std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16),
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include "audio_core/sdl2_sink.h"
|
#include "audio_core/sdl2_sink.h"
|
||||||
#include "audio_core/stream.h"
|
#include "audio_core/stream.h"
|
||||||
#include "audio_core/time_stretch.h"
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
//#include "common/settings.h"
|
//#include "common/settings.h"
|
||||||
|
@ -27,7 +26,7 @@ namespace AudioCore {
|
||||||
class SDLSinkStream final : public SinkStream {
|
class SDLSinkStream final : public SinkStream {
|
||||||
public:
|
public:
|
||||||
SDLSinkStream(u32 sample_rate, u32 num_channels_, const std::string& output_device)
|
SDLSinkStream(u32 sample_rate, u32 num_channels_, const std::string& output_device)
|
||||||
: num_channels{std::min(num_channels_, 6u)}, time_stretch{sample_rate, num_channels} {
|
: num_channels{std::min(num_channels_, 6u)} {
|
||||||
|
|
||||||
SDL_AudioSpec spec;
|
SDL_AudioSpec spec;
|
||||||
spec.freq = sample_rate;
|
spec.freq = sample_rate;
|
||||||
|
@ -116,7 +115,6 @@ private:
|
||||||
SDL_AudioDeviceID dev = 0;
|
SDL_AudioDeviceID dev = 0;
|
||||||
u32 num_channels{};
|
u32 num_channels{};
|
||||||
std::atomic<bool> should_flush{};
|
std::atomic<bool> should_flush{};
|
||||||
TimeStretcher time_stretch;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
SDLSink::SDLSink(std::string_view target_device_name) {
|
SDLSink::SDLSink(std::string_view target_device_name) {
|
||||||
|
|
|
@ -46,6 +46,43 @@ namespace Common {
|
||||||
reinterpret_cast<__int64*>(expected.data())) != 0;
|
reinterpret_cast<__int64*>(expected.data())) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
|
||||||
|
u8& actual) {
|
||||||
|
actual =
|
||||||
|
_InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
|
||||||
|
return actual == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
|
||||||
|
u16& actual) {
|
||||||
|
actual =
|
||||||
|
_InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
|
||||||
|
return actual == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
|
||||||
|
u32& actual) {
|
||||||
|
actual =
|
||||||
|
_InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
|
||||||
|
return actual == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
|
||||||
|
u64& actual) {
|
||||||
|
actual = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer), value,
|
||||||
|
expected);
|
||||||
|
return actual == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
|
||||||
|
u128& actual) {
|
||||||
|
const bool result =
|
||||||
|
_InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
|
||||||
|
value[0], reinterpret_cast<__int64*>(expected.data())) != 0;
|
||||||
|
actual = expected;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
|
[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
|
||||||
u128 result{};
|
u128 result{};
|
||||||
_InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), result[1],
|
_InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), result[1],
|
||||||
|
@ -79,6 +116,42 @@ namespace Common {
|
||||||
return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
|
return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
|
||||||
|
u8& actual) {
|
||||||
|
actual = __sync_val_compare_and_swap(pointer, expected, value);
|
||||||
|
return actual == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
|
||||||
|
u16& actual) {
|
||||||
|
actual = __sync_val_compare_and_swap(pointer, expected, value);
|
||||||
|
return actual == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
|
||||||
|
u32& actual) {
|
||||||
|
actual = __sync_val_compare_and_swap(pointer, expected, value);
|
||||||
|
return actual == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
|
||||||
|
u64& actual) {
|
||||||
|
actual = __sync_val_compare_and_swap(pointer, expected, value);
|
||||||
|
return actual == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
|
||||||
|
u128& actual) {
|
||||||
|
unsigned __int128 value_a;
|
||||||
|
unsigned __int128 expected_a;
|
||||||
|
unsigned __int128 actual_a;
|
||||||
|
std::memcpy(&value_a, value.data(), sizeof(u128));
|
||||||
|
std::memcpy(&expected_a, expected.data(), sizeof(u128));
|
||||||
|
actual_a = __sync_val_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
|
||||||
|
std::memcpy(actual.data(), &actual_a, sizeof(u128));
|
||||||
|
return actual_a == expected_a;
|
||||||
|
}
|
||||||
|
|
||||||
[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
|
[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
|
||||||
unsigned __int128 zeros_a = 0;
|
unsigned __int128 zeros_a = 0;
|
||||||
unsigned __int128 result_a =
|
unsigned __int128 result_a =
|
||||||
|
|
|
@ -38,6 +38,7 @@ enum class CPUAccuracy : u32 {
|
||||||
Auto = 0,
|
Auto = 0,
|
||||||
Accurate = 1,
|
Accurate = 1,
|
||||||
Unsafe = 2,
|
Unsafe = 2,
|
||||||
|
Paranoid = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class FullscreenMode : u32 {
|
enum class FullscreenMode : u32 {
|
||||||
|
@ -470,7 +471,7 @@ struct Values {
|
||||||
|
|
||||||
// Cpu
|
// Cpu
|
||||||
RangedSetting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto,
|
RangedSetting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto,
|
||||||
CPUAccuracy::Unsafe, "cpu_accuracy"};
|
CPUAccuracy::Paranoid, "cpu_accuracy"};
|
||||||
// TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021
|
// TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021
|
||||||
BasicSetting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"};
|
BasicSetting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"};
|
||||||
BasicSetting<bool> cpu_debug_mode{false, "cpu_debug_mode"};
|
BasicSetting<bool> cpu_debug_mode{false, "cpu_debug_mode"};
|
||||||
|
|
|
@ -55,8 +55,9 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
|
||||||
u64 NativeClock::GetRTSC() {
|
u64 NativeClock::GetRTSC() {
|
||||||
TimePoint new_time_point{};
|
TimePoint new_time_point{};
|
||||||
TimePoint current_time_point{};
|
TimePoint current_time_point{};
|
||||||
|
|
||||||
|
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
|
||||||
do {
|
do {
|
||||||
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
|
|
||||||
_mm_mfence();
|
_mm_mfence();
|
||||||
const u64 current_measure = __rdtsc();
|
const u64 current_measure = __rdtsc();
|
||||||
u64 diff = current_measure - current_time_point.inner.last_measure;
|
u64 diff = current_measure - current_time_point.inner.last_measure;
|
||||||
|
@ -66,7 +67,7 @@ u64 NativeClock::GetRTSC() {
|
||||||
: current_time_point.inner.last_measure;
|
: current_time_point.inner.last_measure;
|
||||||
new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
|
new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
|
||||||
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
||||||
current_time_point.pack));
|
current_time_point.pack, current_time_point.pack));
|
||||||
/// The clock cannot be more precise than the guest timer, remove the lower bits
|
/// The clock cannot be more precise than the guest timer, remove the lower bits
|
||||||
return new_time_point.inner.accumulated_ticks & inaccuracy_mask;
|
return new_time_point.inner.accumulated_ticks & inaccuracy_mask;
|
||||||
}
|
}
|
||||||
|
@ -75,13 +76,14 @@ void NativeClock::Pause(bool is_paused) {
|
||||||
if (!is_paused) {
|
if (!is_paused) {
|
||||||
TimePoint current_time_point{};
|
TimePoint current_time_point{};
|
||||||
TimePoint new_time_point{};
|
TimePoint new_time_point{};
|
||||||
|
|
||||||
|
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
|
||||||
do {
|
do {
|
||||||
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
|
|
||||||
new_time_point.pack = current_time_point.pack;
|
new_time_point.pack = current_time_point.pack;
|
||||||
_mm_mfence();
|
_mm_mfence();
|
||||||
new_time_point.inner.last_measure = __rdtsc();
|
new_time_point.inner.last_measure = __rdtsc();
|
||||||
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
||||||
current_time_point.pack));
|
current_time_point.pack, current_time_point.pack));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -186,35 +186,41 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
|
||||||
if (!Settings::values.cpuopt_recompile_exclusives) {
|
if (!Settings::values.cpuopt_recompile_exclusives) {
|
||||||
config.recompile_on_exclusive_fastmem_failure = false;
|
config.recompile_on_exclusive_fastmem_failure = false;
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
|
// Unsafe optimizations
|
||||||
|
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
|
||||||
|
config.unsafe_optimizations = true;
|
||||||
|
if (Settings::values.cpuopt_unsafe_unfuse_fma) {
|
||||||
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
|
||||||
|
}
|
||||||
|
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
|
||||||
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
|
||||||
|
}
|
||||||
|
if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr) {
|
||||||
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
|
||||||
|
}
|
||||||
|
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
|
||||||
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
||||||
|
}
|
||||||
|
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
|
||||||
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Unsafe optimizations
|
// Curated optimizations
|
||||||
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
|
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) {
|
||||||
config.unsafe_optimizations = true;
|
config.unsafe_optimizations = true;
|
||||||
if (Settings::values.cpuopt_unsafe_unfuse_fma) {
|
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
|
||||||
}
|
|
||||||
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
|
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
|
|
||||||
}
|
|
||||||
if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr) {
|
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
|
||||||
}
|
|
||||||
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
|
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
||||||
}
|
|
||||||
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
|
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Curated optimizations
|
// Paranoia mode for debugging optimizations
|
||||||
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) {
|
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Paranoid) {
|
||||||
config.unsafe_optimizations = true;
|
config.unsafe_optimizations = false;
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
|
config.optimizations = Dynarmic::no_optimizations;
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
|
}
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_unique<Dynarmic::A32::Jit>(config);
|
return std::make_unique<Dynarmic::A32::Jit>(config);
|
||||||
|
|
|
@ -248,35 +248,41 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
|
||||||
if (!Settings::values.cpuopt_recompile_exclusives) {
|
if (!Settings::values.cpuopt_recompile_exclusives) {
|
||||||
config.recompile_on_exclusive_fastmem_failure = false;
|
config.recompile_on_exclusive_fastmem_failure = false;
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
|
// Unsafe optimizations
|
||||||
|
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
|
||||||
|
config.unsafe_optimizations = true;
|
||||||
|
if (Settings::values.cpuopt_unsafe_unfuse_fma) {
|
||||||
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
|
||||||
|
}
|
||||||
|
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
|
||||||
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
|
||||||
|
}
|
||||||
|
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
|
||||||
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
||||||
|
}
|
||||||
|
if (Settings::values.cpuopt_unsafe_fastmem_check) {
|
||||||
|
config.fastmem_address_space_bits = 64;
|
||||||
|
}
|
||||||
|
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
|
||||||
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Unsafe optimizations
|
// Curated optimizations
|
||||||
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
|
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) {
|
||||||
config.unsafe_optimizations = true;
|
config.unsafe_optimizations = true;
|
||||||
if (Settings::values.cpuopt_unsafe_unfuse_fma) {
|
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
|
||||||
}
|
|
||||||
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
|
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
|
|
||||||
}
|
|
||||||
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
|
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
||||||
}
|
|
||||||
if (Settings::values.cpuopt_unsafe_fastmem_check) {
|
|
||||||
config.fastmem_address_space_bits = 64;
|
config.fastmem_address_space_bits = 64;
|
||||||
}
|
|
||||||
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
|
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Curated optimizations
|
// Paranoia mode for debugging optimizations
|
||||||
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) {
|
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Paranoid) {
|
||||||
config.unsafe_optimizations = true;
|
config.unsafe_optimizations = false;
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
|
config.optimizations = Dynarmic::no_optimizations;
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
}
|
||||||
config.fastmem_address_space_bits = 64;
|
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_shared<Dynarmic::A64::Jit>(config);
|
return std::make_shared<Dynarmic::A64::Jit>(config);
|
||||||
|
|
|
@ -18,8 +18,7 @@ BufferQueueConsumer::BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_)
|
||||||
BufferQueueConsumer::~BufferQueueConsumer() = default;
|
BufferQueueConsumer::~BufferQueueConsumer() = default;
|
||||||
|
|
||||||
Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer,
|
Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer,
|
||||||
std::chrono::nanoseconds expected_present,
|
std::chrono::nanoseconds expected_present) {
|
||||||
u64 max_frame_number) {
|
|
||||||
std::scoped_lock lock(core->mutex);
|
std::scoped_lock lock(core->mutex);
|
||||||
|
|
||||||
// Check that the consumer doesn't currently have the maximum number of buffers acquired.
|
// Check that the consumer doesn't currently have the maximum number of buffers acquired.
|
||||||
|
@ -50,12 +49,6 @@ Status BufferQueueConsumer::AcquireBuffer(BufferItem* out_buffer,
|
||||||
while (core->queue.size() > 1 && !core->queue[0].is_auto_timestamp) {
|
while (core->queue.size() > 1 && !core->queue[0].is_auto_timestamp) {
|
||||||
const auto& buffer_item{core->queue[1]};
|
const auto& buffer_item{core->queue[1]};
|
||||||
|
|
||||||
// If dropping entry[0] would leave us with a buffer that the consumer is not yet ready
|
|
||||||
// for, don't drop it.
|
|
||||||
if (max_frame_number && buffer_item.frame_number > max_frame_number) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If entry[1] is timely, drop entry[0] (and repeat).
|
// If entry[1] is timely, drop entry[0] (and repeat).
|
||||||
const auto desired_present = buffer_item.timestamp;
|
const auto desired_present = buffer_item.timestamp;
|
||||||
if (desired_present < expected_present.count() - MAX_REASONABLE_NSEC ||
|
if (desired_present < expected_present.count() - MAX_REASONABLE_NSEC ||
|
||||||
|
@ -200,4 +193,39 @@ Status BufferQueueConsumer::Connect(std::shared_ptr<IConsumerListener> consumer_
|
||||||
return Status::NoError;
|
return Status::NoError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status BufferQueueConsumer::GetReleasedBuffers(u64* out_slot_mask) {
|
||||||
|
if (out_slot_mask == nullptr) {
|
||||||
|
LOG_ERROR(Service_NVFlinger, "out_slot_mask may not be nullptr");
|
||||||
|
return Status::BadValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::scoped_lock lock(core->mutex);
|
||||||
|
|
||||||
|
if (core->is_abandoned) {
|
||||||
|
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
|
||||||
|
return Status::NoInit;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 mask = 0;
|
||||||
|
for (int s = 0; s < BufferQueueDefs::NUM_BUFFER_SLOTS; ++s) {
|
||||||
|
if (!slots[s].acquire_called) {
|
||||||
|
mask |= (1ULL << s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove from the mask queued buffers for which acquire has been called, since the consumer
|
||||||
|
// will not receive their buffer addresses and so must retain their cached information
|
||||||
|
auto current(core->queue.begin());
|
||||||
|
while (current != core->queue.end()) {
|
||||||
|
if (current->acquire_called) {
|
||||||
|
mask &= ~(1ULL << current->slot);
|
||||||
|
}
|
||||||
|
++current;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_DEBUG(Service_NVFlinger, "returning mask {}", mask);
|
||||||
|
*out_slot_mask = mask;
|
||||||
|
return Status::NoError;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Service::android
|
} // namespace Service::android
|
||||||
|
|
|
@ -24,10 +24,10 @@ public:
|
||||||
explicit BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_);
|
explicit BufferQueueConsumer(std::shared_ptr<BufferQueueCore> core_);
|
||||||
~BufferQueueConsumer();
|
~BufferQueueConsumer();
|
||||||
|
|
||||||
Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present,
|
Status AcquireBuffer(BufferItem* out_buffer, std::chrono::nanoseconds expected_present);
|
||||||
u64 max_frame_number = 0);
|
|
||||||
Status ReleaseBuffer(s32 slot, u64 frame_number, const Fence& release_fence);
|
Status ReleaseBuffer(s32 slot, u64 frame_number, const Fence& release_fence);
|
||||||
Status Connect(std::shared_ptr<IConsumerListener> consumer_listener, bool controlled_by_app);
|
Status Connect(std::shared_ptr<IConsumerListener> consumer_listener, bool controlled_by_app);
|
||||||
|
Status GetReleasedBuffers(u64* out_slot_mask);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<BufferQueueCore> core;
|
std::shared_ptr<BufferQueueCore> core;
|
||||||
|
|
|
@ -95,7 +95,6 @@ void BufferQueueCore::FreeBufferLocked(s32 slot) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferQueueCore::FreeAllBuffersLocked() {
|
void BufferQueueCore::FreeAllBuffersLocked() {
|
||||||
queue.clear();
|
|
||||||
buffer_has_been_queued = false;
|
buffer_has_been_queued = false;
|
||||||
|
|
||||||
for (s32 slot = 0; slot < BufferQueueDefs::NUM_BUFFER_SLOTS; ++slot) {
|
for (s32 slot = 0; slot < BufferQueueDefs::NUM_BUFFER_SLOTS; ++slot) {
|
||||||
|
|
|
@ -73,8 +73,6 @@ private:
|
||||||
u32 transform_hint{};
|
u32 transform_hint{};
|
||||||
bool is_allocating{};
|
bool is_allocating{};
|
||||||
mutable std::condition_variable_any is_allocating_condition;
|
mutable std::condition_variable_any is_allocating_condition;
|
||||||
bool allow_allocation{true};
|
|
||||||
u64 buffer_age{};
|
|
||||||
bool is_shutting_down{};
|
bool is_shutting_down{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -62,11 +62,12 @@ Status BufferQueueProducer::RequestBuffer(s32 slot, std::shared_ptr<GraphicBuffe
|
||||||
|
|
||||||
Status BufferQueueProducer::SetBufferCount(s32 buffer_count) {
|
Status BufferQueueProducer::SetBufferCount(s32 buffer_count) {
|
||||||
LOG_DEBUG(Service_NVFlinger, "count = {}", buffer_count);
|
LOG_DEBUG(Service_NVFlinger, "count = {}", buffer_count);
|
||||||
std::shared_ptr<IConsumerListener> listener;
|
|
||||||
|
|
||||||
|
std::shared_ptr<IConsumerListener> listener;
|
||||||
{
|
{
|
||||||
std::scoped_lock lock(core->mutex);
|
std::scoped_lock lock(core->mutex);
|
||||||
core->WaitWhileAllocatingLocked();
|
core->WaitWhileAllocatingLocked();
|
||||||
|
|
||||||
if (core->is_abandoned) {
|
if (core->is_abandoned) {
|
||||||
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
|
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
|
||||||
return Status::NoInit;
|
return Status::NoInit;
|
||||||
|
@ -120,7 +121,7 @@ Status BufferQueueProducer::SetBufferCount(s32 buffer_count) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
|
Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
|
||||||
Status* returnFlags) const {
|
Status* return_flags) const {
|
||||||
bool try_again = true;
|
bool try_again = true;
|
||||||
|
|
||||||
while (try_again) {
|
while (try_again) {
|
||||||
|
@ -142,10 +143,12 @@ Status BufferQueueProducer::WaitForFreeSlotThenRelock(bool async, s32* found,
|
||||||
ASSERT(slots[s].buffer_state == BufferState::Free);
|
ASSERT(slots[s].buffer_state == BufferState::Free);
|
||||||
if (slots[s].graphic_buffer != nullptr) {
|
if (slots[s].graphic_buffer != nullptr) {
|
||||||
core->FreeBufferLocked(s);
|
core->FreeBufferLocked(s);
|
||||||
*returnFlags |= Status::ReleaseAllBuffers;
|
*return_flags |= Status::ReleaseAllBuffers;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Look for a free buffer to give to the client
|
||||||
|
*found = BufferQueueCore::INVALID_BUFFER_SLOT;
|
||||||
s32 dequeued_count{};
|
s32 dequeued_count{};
|
||||||
s32 acquired_count{};
|
s32 acquired_count{};
|
||||||
for (s32 s{}; s < max_buffer_count; ++s) {
|
for (s32 s{}; s < max_buffer_count; ++s) {
|
||||||
|
@ -235,68 +238,50 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
|
||||||
{
|
{
|
||||||
std::scoped_lock lock(core->mutex);
|
std::scoped_lock lock(core->mutex);
|
||||||
core->WaitWhileAllocatingLocked();
|
core->WaitWhileAllocatingLocked();
|
||||||
|
|
||||||
if (format == PixelFormat::NoFormat) {
|
if (format == PixelFormat::NoFormat) {
|
||||||
format = core->default_buffer_format;
|
format = core->default_buffer_format;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable the usage bits the consumer requested
|
// Enable the usage bits the consumer requested
|
||||||
usage |= core->consumer_usage_bit;
|
usage |= core->consumer_usage_bit;
|
||||||
|
|
||||||
|
s32 found{};
|
||||||
|
Status status = WaitForFreeSlotThenRelock(async, &found, &return_flags);
|
||||||
|
if (status != Status::NoError) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This should not happen
|
||||||
|
if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
|
||||||
|
LOG_ERROR(Service_NVFlinger, "no available buffer slots");
|
||||||
|
return Status::Busy;
|
||||||
|
}
|
||||||
|
|
||||||
|
*out_slot = found;
|
||||||
|
|
||||||
|
attached_by_consumer = slots[found].attached_by_consumer;
|
||||||
|
|
||||||
const bool use_default_size = !width && !height;
|
const bool use_default_size = !width && !height;
|
||||||
if (use_default_size) {
|
if (use_default_size) {
|
||||||
width = core->default_width;
|
width = core->default_width;
|
||||||
height = core->default_height;
|
height = core->default_height;
|
||||||
}
|
}
|
||||||
|
|
||||||
s32 found = BufferItem::INVALID_BUFFER_SLOT;
|
|
||||||
while (found == BufferItem::INVALID_BUFFER_SLOT) {
|
|
||||||
Status status = WaitForFreeSlotThenRelock(async, &found, &return_flags);
|
|
||||||
if (status != Status::NoError) {
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This should not happen
|
|
||||||
if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
|
|
||||||
LOG_DEBUG(Service_NVFlinger, "no available buffer slots");
|
|
||||||
return Status::Busy;
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::shared_ptr<GraphicBuffer>& buffer(slots[found].graphic_buffer);
|
|
||||||
|
|
||||||
// If we are not allowed to allocate new buffers, WaitForFreeSlotThenRelock must have
|
|
||||||
// returned a slot containing a buffer. If this buffer would require reallocation to
|
|
||||||
// meet the requested attributes, we free it and attempt to get another one.
|
|
||||||
if (!core->allow_allocation) {
|
|
||||||
if (buffer->NeedsReallocation(width, height, format, usage)) {
|
|
||||||
core->FreeBufferLocked(found);
|
|
||||||
found = BufferItem::INVALID_BUFFER_SLOT;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*out_slot = found;
|
|
||||||
attached_by_consumer = slots[found].attached_by_consumer;
|
|
||||||
slots[found].buffer_state = BufferState::Dequeued;
|
slots[found].buffer_state = BufferState::Dequeued;
|
||||||
|
|
||||||
const std::shared_ptr<GraphicBuffer>& buffer(slots[found].graphic_buffer);
|
const std::shared_ptr<GraphicBuffer>& buffer(slots[found].graphic_buffer);
|
||||||
|
if ((buffer == nullptr) || (buffer->Width() != width) || (buffer->Height() != height) ||
|
||||||
if ((buffer == nullptr) || buffer->NeedsReallocation(width, height, format, usage)) {
|
(buffer->Format() != format) || ((buffer->Usage() & usage) != usage)) {
|
||||||
slots[found].acquire_called = false;
|
slots[found].acquire_called = false;
|
||||||
slots[found].graphic_buffer = nullptr;
|
slots[found].graphic_buffer = nullptr;
|
||||||
slots[found].request_buffer_called = false;
|
slots[found].request_buffer_called = false;
|
||||||
slots[found].fence = Fence::NoFence();
|
slots[found].fence = Fence::NoFence();
|
||||||
core->buffer_age = 0;
|
|
||||||
return_flags |= Status::BufferNeedsReallocation;
|
return_flags |= Status::BufferNeedsReallocation;
|
||||||
} else {
|
|
||||||
// We add 1 because that will be the frame number when this buffer
|
|
||||||
// is queued
|
|
||||||
core->buffer_age = core->frame_counter + 1 - slots[found].frame_number;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_DEBUG(Service_NVFlinger, "setting buffer age to {}", core->buffer_age);
|
|
||||||
|
|
||||||
*out_fence = slots[found].fence;
|
*out_fence = slots[found].fence;
|
||||||
|
|
||||||
slots[found].fence = Fence::NoFence();
|
slots[found].fence = Fence::NoFence();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -311,6 +296,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
|
||||||
|
|
||||||
{
|
{
|
||||||
std::scoped_lock lock(core->mutex);
|
std::scoped_lock lock(core->mutex);
|
||||||
|
|
||||||
if (core->is_abandoned) {
|
if (core->is_abandoned) {
|
||||||
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
|
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
|
||||||
return Status::NoInit;
|
return Status::NoInit;
|
||||||
|
@ -327,6 +313,7 @@ Status BufferQueueProducer::DequeueBuffer(s32* out_slot, Fence* out_fence, bool
|
||||||
|
|
||||||
LOG_DEBUG(Service_NVFlinger, "returning slot={} frame={}, flags={}", *out_slot,
|
LOG_DEBUG(Service_NVFlinger, "returning slot={} frame={}, flags={}", *out_slot,
|
||||||
slots[*out_slot].frame_number, return_flags);
|
slots[*out_slot].frame_number, return_flags);
|
||||||
|
|
||||||
return return_flags;
|
return return_flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -334,6 +321,7 @@ Status BufferQueueProducer::DetachBuffer(s32 slot) {
|
||||||
LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
|
LOG_DEBUG(Service_NVFlinger, "slot {}", slot);
|
||||||
|
|
||||||
std::scoped_lock lock(core->mutex);
|
std::scoped_lock lock(core->mutex);
|
||||||
|
|
||||||
if (core->is_abandoned) {
|
if (core->is_abandoned) {
|
||||||
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
|
LOG_ERROR(Service_NVFlinger, "BufferQueue has been abandoned");
|
||||||
return Status::NoInit;
|
return Status::NoInit;
|
||||||
|
@ -369,7 +357,6 @@ Status BufferQueueProducer::DetachNextBuffer(std::shared_ptr<GraphicBuffer>* out
|
||||||
}
|
}
|
||||||
|
|
||||||
std::scoped_lock lock(core->mutex);
|
std::scoped_lock lock(core->mutex);
|
||||||
|
|
||||||
core->WaitWhileAllocatingLocked();
|
core->WaitWhileAllocatingLocked();
|
||||||
|
|
||||||
if (core->is_abandoned) {
|
if (core->is_abandoned) {
|
||||||
|
@ -423,6 +410,7 @@ Status BufferQueueProducer::AttachBuffer(s32* out_slot,
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This should not happen
|
||||||
if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
|
if (found == BufferQueueCore::INVALID_BUFFER_SLOT) {
|
||||||
LOG_ERROR(Service_NVFlinger, "No available buffer slots");
|
LOG_ERROR(Service_NVFlinger, "No available buffer slots");
|
||||||
return Status::Busy;
|
return Status::Busy;
|
||||||
|
@ -466,8 +454,8 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
|
||||||
return Status::BadValue;
|
return Status::BadValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<IConsumerListener> frameAvailableListener;
|
std::shared_ptr<IConsumerListener> frame_available_listener;
|
||||||
std::shared_ptr<IConsumerListener> frameReplacedListener;
|
std::shared_ptr<IConsumerListener> frame_replaced_listener;
|
||||||
s32 callback_ticket{};
|
s32 callback_ticket{};
|
||||||
BufferItem item;
|
BufferItem item;
|
||||||
|
|
||||||
|
@ -541,12 +529,13 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
|
||||||
item.fence = fence;
|
item.fence = fence;
|
||||||
item.is_droppable = core->dequeue_buffer_cannot_block || async;
|
item.is_droppable = core->dequeue_buffer_cannot_block || async;
|
||||||
item.swap_interval = swap_interval;
|
item.swap_interval = swap_interval;
|
||||||
|
|
||||||
sticky_transform = sticky_transform_;
|
sticky_transform = sticky_transform_;
|
||||||
|
|
||||||
if (core->queue.empty()) {
|
if (core->queue.empty()) {
|
||||||
// When the queue is empty, we can simply queue this buffer
|
// When the queue is empty, we can simply queue this buffer
|
||||||
core->queue.push_back(item);
|
core->queue.push_back(item);
|
||||||
frameAvailableListener = core->consumer_listener;
|
frame_available_listener = core->consumer_listener;
|
||||||
} else {
|
} else {
|
||||||
// When the queue is not empty, we need to look at the front buffer
|
// When the queue is not empty, we need to look at the front buffer
|
||||||
// state to see if we need to replace it
|
// state to see if we need to replace it
|
||||||
|
@ -563,10 +552,10 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
|
||||||
}
|
}
|
||||||
// Overwrite the droppable buffer with the incoming one
|
// Overwrite the droppable buffer with the incoming one
|
||||||
*front = item;
|
*front = item;
|
||||||
frameReplacedListener = core->consumer_listener;
|
frame_replaced_listener = core->consumer_listener;
|
||||||
} else {
|
} else {
|
||||||
core->queue.push_back(item);
|
core->queue.push_back(item);
|
||||||
frameAvailableListener = core->consumer_listener;
|
frame_available_listener = core->consumer_listener;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -592,10 +581,10 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
|
||||||
callback_condition.wait(callback_mutex);
|
callback_condition.wait(callback_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (frameAvailableListener != nullptr) {
|
if (frame_available_listener != nullptr) {
|
||||||
frameAvailableListener->OnFrameAvailable(item);
|
frame_available_listener->OnFrameAvailable(item);
|
||||||
} else if (frameReplacedListener != nullptr) {
|
} else if (frame_replaced_listener != nullptr) {
|
||||||
frameReplacedListener->OnFrameReplaced(item);
|
frame_replaced_listener->OnFrameReplaced(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
++current_callback_ticket;
|
++current_callback_ticket;
|
||||||
|
@ -669,13 +658,6 @@ Status BufferQueueProducer::Query(NativeWindow what, s32* out_value) {
|
||||||
case NativeWindow::ConsumerUsageBits:
|
case NativeWindow::ConsumerUsageBits:
|
||||||
value = core->consumer_usage_bit;
|
value = core->consumer_usage_bit;
|
||||||
break;
|
break;
|
||||||
case NativeWindow::BufferAge:
|
|
||||||
if (core->buffer_age > INT32_MAX) {
|
|
||||||
value = 0;
|
|
||||||
} else {
|
|
||||||
value = static_cast<u32>(core->buffer_age);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return Status::BadValue;
|
return Status::BadValue;
|
||||||
|
@ -737,7 +719,6 @@ Status BufferQueueProducer::Connect(const std::shared_ptr<IProducerListener>& li
|
||||||
core->buffer_has_been_queued = false;
|
core->buffer_has_been_queued = false;
|
||||||
core->dequeue_buffer_cannot_block =
|
core->dequeue_buffer_cannot_block =
|
||||||
core->consumer_controlled_by_app && producer_controlled_by_app;
|
core->consumer_controlled_by_app && producer_controlled_by_app;
|
||||||
core->allow_allocation = true;
|
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
@ -770,7 +751,7 @@ Status BufferQueueProducer::Disconnect(NativeWindowApi api) {
|
||||||
core->SignalDequeueCondition();
|
core->SignalDequeueCondition();
|
||||||
buffer_wait_event->GetWritableEvent().Signal();
|
buffer_wait_event->GetWritableEvent().Signal();
|
||||||
listener = core->consumer_listener;
|
listener = core->consumer_listener;
|
||||||
} else if (core->connected_api != NativeWindowApi::NoConnectedApi) {
|
} else {
|
||||||
LOG_ERROR(Service_NVFlinger, "still connected to another api (cur = {} req = {})",
|
LOG_ERROR(Service_NVFlinger, "still connected to another api (cur = {} req = {})",
|
||||||
core->connected_api, api);
|
core->connected_api, api);
|
||||||
status = Status::BadValue;
|
status = Status::BadValue;
|
||||||
|
|
|
@ -66,7 +66,7 @@ public:
|
||||||
private:
|
private:
|
||||||
BufferQueueProducer(const BufferQueueProducer&) = delete;
|
BufferQueueProducer(const BufferQueueProducer&) = delete;
|
||||||
|
|
||||||
Status WaitForFreeSlotThenRelock(bool async, s32* found, Status* returnFlags) const;
|
Status WaitForFreeSlotThenRelock(bool async, s32* found, Status* return_flags) const;
|
||||||
|
|
||||||
Kernel::KEvent* buffer_wait_event{};
|
Kernel::KEvent* buffer_wait_event{};
|
||||||
Service::KernelHelpers::ServiceContext& service_context;
|
Service::KernelHelpers::ServiceContext& service_context;
|
||||||
|
|
|
@ -36,38 +36,41 @@ void ConsumerBase::FreeBufferLocked(s32 slot_index) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConsumerBase::OnFrameAvailable(const BufferItem& item) {
|
void ConsumerBase::OnFrameAvailable(const BufferItem& item) {
|
||||||
std::scoped_lock lock(mutex);
|
|
||||||
LOG_DEBUG(Service_NVFlinger, "called");
|
LOG_DEBUG(Service_NVFlinger, "called");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConsumerBase::OnFrameReplaced(const BufferItem& item) {
|
void ConsumerBase::OnFrameReplaced(const BufferItem& item) {
|
||||||
std::scoped_lock lock(mutex);
|
|
||||||
LOG_DEBUG(Service_NVFlinger, "called");
|
LOG_DEBUG(Service_NVFlinger, "called");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConsumerBase::OnBuffersReleased() {
|
void ConsumerBase::OnBuffersReleased() {
|
||||||
std::scoped_lock lock(mutex);
|
std::scoped_lock lock(mutex);
|
||||||
|
|
||||||
LOG_DEBUG(Service_NVFlinger, "called");
|
LOG_DEBUG(Service_NVFlinger, "called");
|
||||||
|
|
||||||
|
if (is_abandoned) {
|
||||||
|
// Nothing to do if we're already abandoned.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 mask = 0;
|
||||||
|
consumer->GetReleasedBuffers(&mask);
|
||||||
|
for (int i = 0; i < BufferQueueDefs::NUM_BUFFER_SLOTS; i++) {
|
||||||
|
if (mask & (1ULL << i)) {
|
||||||
|
FreeBufferLocked(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConsumerBase::OnSidebandStreamChanged() {}
|
void ConsumerBase::OnSidebandStreamChanged() {}
|
||||||
|
|
||||||
Status ConsumerBase::AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when,
|
Status ConsumerBase::AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when) {
|
||||||
u64 max_frame_number) {
|
Status err = consumer->AcquireBuffer(item, present_when);
|
||||||
if (is_abandoned) {
|
|
||||||
LOG_ERROR(Service_NVFlinger, "consumer is abandoned!");
|
|
||||||
return Status::NoInit;
|
|
||||||
}
|
|
||||||
|
|
||||||
Status err = consumer->AcquireBuffer(item, present_when, max_frame_number);
|
|
||||||
if (err != Status::NoError) {
|
if (err != Status::NoError) {
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (item->graphic_buffer != nullptr) {
|
if (item->graphic_buffer != nullptr) {
|
||||||
if (slots[item->slot].graphic_buffer != nullptr) {
|
|
||||||
FreeBufferLocked(item->slot);
|
|
||||||
}
|
|
||||||
slots[item->slot].graphic_buffer = item->graphic_buffer;
|
slots[item->slot].graphic_buffer = item->graphic_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,8 +35,7 @@ protected:
|
||||||
virtual void OnSidebandStreamChanged() override;
|
virtual void OnSidebandStreamChanged() override;
|
||||||
|
|
||||||
void FreeBufferLocked(s32 slot_index);
|
void FreeBufferLocked(s32 slot_index);
|
||||||
Status AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when,
|
Status AcquireBufferLocked(BufferItem* item, std::chrono::nanoseconds present_when);
|
||||||
u64 max_frame_number = 0);
|
|
||||||
Status ReleaseBufferLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer);
|
Status ReleaseBufferLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer);
|
||||||
bool StillTracking(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer) const;
|
bool StillTracking(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer) const;
|
||||||
Status AddReleaseFenceLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer,
|
Status AddReleaseFenceLocked(s32 slot, const std::shared_ptr<GraphicBuffer> graphic_buffer,
|
||||||
|
|
|
@ -104,7 +104,7 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
|
||||||
std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
|
std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
|
||||||
const auto lock_guard = Lock();
|
const auto lock_guard = Lock();
|
||||||
|
|
||||||
LOG_DEBUG(Service, "Opening \"{}\" display", name);
|
LOG_DEBUG(Service_NVFlinger, "Opening \"{}\" display", name);
|
||||||
|
|
||||||
const auto itr =
|
const auto itr =
|
||||||
std::find_if(displays.begin(), displays.end(),
|
std::find_if(displays.begin(), displays.end(),
|
||||||
|
@ -219,7 +219,7 @@ VI::Layer* NVFlinger::FindOrCreateLayer(u64 display_id, u64 layer_id) {
|
||||||
auto* layer = display->FindLayer(layer_id);
|
auto* layer = display->FindLayer(layer_id);
|
||||||
|
|
||||||
if (layer == nullptr) {
|
if (layer == nullptr) {
|
||||||
LOG_DEBUG(Service, "Layer at id {} not found. Trying to create it.", layer_id);
|
LOG_DEBUG(Service_NVFlinger, "Layer at id {} not found. Trying to create it.", layer_id);
|
||||||
CreateLayerAtId(*display, layer_id);
|
CreateLayerAtId(*display, layer_id);
|
||||||
return display->FindLayer(layer_id);
|
return display->FindLayer(layer_id);
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,21 +39,9 @@ GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std
|
||||||
return gpu_addr;
|
return gpu_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::UnmapSubmappedRanges(GPUVAddr gpu_addr, std::size_t size) {
|
|
||||||
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
|
|
||||||
for (const auto& [map_addr, map_size] : submapped_ranges) {
|
|
||||||
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
|
||||||
const std::optional<VAddr> cpu_vaddr = GpuToCpuAddress(map_addr);
|
|
||||||
if (!cpu_vaddr) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
rasterizer->UnmapMemory(*cpu_vaddr, map_size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
|
GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
|
||||||
// Unmap any pre-existing rasterizer memory in this range
|
// Mark any pre-existing rasterizer memory in this range as remapped
|
||||||
UnmapSubmappedRanges(gpu_addr, size);
|
rasterizer->ModifyGPUMemory(gpu_addr, size);
|
||||||
|
|
||||||
const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
|
const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
|
||||||
if (it != map_ranges.end() && it->first == gpu_addr) {
|
if (it != map_ranges.end() && it->first == gpu_addr) {
|
||||||
|
@ -85,8 +73,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
|
UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
|
||||||
}
|
}
|
||||||
|
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
|
||||||
|
|
||||||
|
for (const auto& [map_addr, map_size] : submapped_ranges) {
|
||||||
|
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
||||||
|
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
|
||||||
|
ASSERT(cpu_addr);
|
||||||
|
|
||||||
|
rasterizer->UnmapMemory(*cpu_addr, map_size);
|
||||||
|
}
|
||||||
|
|
||||||
UnmapSubmappedRanges(gpu_addr, size);
|
|
||||||
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
|
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -149,7 +149,6 @@ private:
|
||||||
[[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
|
[[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
|
||||||
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
|
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
|
||||||
GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
|
GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
|
||||||
void UnmapSubmappedRanges(GPUVAddr gpu_addr, std::size_t size);
|
|
||||||
[[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
|
[[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
|
||||||
bool start_32bit_address = false) const;
|
bool start_32bit_address = false) const;
|
||||||
|
|
||||||
|
|
|
@ -454,15 +454,20 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
|
||||||
return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
|
return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
|
||||||
});
|
});
|
||||||
for (const ImageId image_id : images) {
|
for (const ImageId image_id : images) {
|
||||||
Image& image = slot_images[image_id];
|
DownloadImage(image_id);
|
||||||
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
|
|
||||||
const auto copies = FullDownloadCopies(image.info);
|
|
||||||
image.DownloadMemory(map, copies);
|
|
||||||
runtime.Finish();
|
|
||||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void TextureCache<P>::DownloadImage(ImageId image_id) {
|
||||||
|
Image& image = slot_images[image_id];
|
||||||
|
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
|
||||||
|
const auto copies = FullDownloadCopies(image.info);
|
||||||
|
image.DownloadMemory(map, copies);
|
||||||
|
runtime.Finish();
|
||||||
|
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
|
void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||||
std::vector<ImageId> deleted_images;
|
std::vector<ImageId> deleted_images;
|
||||||
|
@ -1058,7 +1063,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
for (const ImageId overlap_id : ignore_textures) {
|
for (const ImageId overlap_id : ignore_textures) {
|
||||||
Image& overlap = slot_images[overlap_id];
|
Image& overlap = slot_images[overlap_id];
|
||||||
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
||||||
UNIMPLEMENTED();
|
DownloadImage(overlap_id);
|
||||||
}
|
}
|
||||||
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
||||||
UntrackImage(overlap, overlap_id);
|
UntrackImage(overlap, overlap_id);
|
||||||
|
|
|
@ -139,6 +139,9 @@ public:
|
||||||
/// Download contents of host images to guest memory in a region
|
/// Download contents of host images to guest memory in a region
|
||||||
void DownloadMemory(VAddr cpu_addr, size_t size);
|
void DownloadMemory(VAddr cpu_addr, size_t size);
|
||||||
|
|
||||||
|
/// Download contents of host images to guest memory
|
||||||
|
void DownloadImage(ImageId image_id);
|
||||||
|
|
||||||
/// Remove images in a region
|
/// Remove images in a region
|
||||||
void UnmapMemory(VAddr cpu_addr, size_t size);
|
void UnmapMemory(VAddr cpu_addr, size_t size);
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,11 @@
|
||||||
<string>Unsafe</string>
|
<string>Unsafe</string>
|
||||||
</property>
|
</property>
|
||||||
</item>
|
</item>
|
||||||
|
<item>
|
||||||
|
<property name="text">
|
||||||
|
<string>Paranoid (disables most optimizations)</string>
|
||||||
|
</property>
|
||||||
|
</item>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
|
|
|
@ -342,12 +342,6 @@ fps_cap =
|
||||||
# null: No audio output
|
# null: No audio output
|
||||||
output_engine =
|
output_engine =
|
||||||
|
|
||||||
# Whether or not to enable the audio-stretching post-processing effect.
|
|
||||||
# This effect adjusts audio speed to match emulation speed and helps prevent audio stutter,
|
|
||||||
# at the cost of increasing audio latency.
|
|
||||||
# 0: No, 1 (default): Yes
|
|
||||||
enable_audio_stretching =
|
|
||||||
|
|
||||||
# Which audio device to use.
|
# Which audio device to use.
|
||||||
# auto (default): Auto-select
|
# auto (default): Auto-select
|
||||||
output_device =
|
output_device =
|
||||||
|
|
Loading…
Reference in a new issue