early-access version 3591

This commit is contained in:
pineappleEA 2023-05-15 04:33:04 +02:00
parent 6d16ab8a99
commit d18469456b
14 changed files with 244 additions and 176 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 3589. This is the source code for early-access 3591.
## Legal Notice ## Legal Notice

View file

@ -188,6 +188,8 @@ void AudioRenderer::ThreadFunc() {
max_time = std::min(command_buffer.time_limit, max_time); max_time = std::min(command_buffer.time_limit, max_time);
command_list_processor.SetProcessTimeMax(max_time); command_list_processor.SetProcessTimeMax(max_time);
streams[index]->WaitFreeSpace();
// Process the command list // Process the command list
{ {
MICROPROFILE_SCOPE(Audio_Renderer); MICROPROFILE_SCOPE(Audio_Renderer);

View file

@ -15,14 +15,9 @@ MICROPROFILE_DEFINE(Audio_RenderSystemManager, "Audio", "Render System Manager",
MP_RGB(60, 19, 97)); MP_RGB(60, 19, 97));
namespace AudioCore::AudioRenderer { namespace AudioCore::AudioRenderer {
constexpr std::chrono::nanoseconds RENDER_TIME{5'000'000UL};
SystemManager::SystemManager(Core::System& core_) SystemManager::SystemManager(Core::System& core_)
: core{core_}, adsp{core.AudioCore().GetADSP()}, mailbox{adsp.GetRenderMailbox()}, : core{core_}, adsp{core.AudioCore().GetADSP()}, mailbox{adsp.GetRenderMailbox()} {}
thread_event{Core::Timing::CreateEvent(
"AudioRendererSystemManager", [this](std::uintptr_t, s64 time, std::chrono::nanoseconds) {
return ThreadFunc2(time);
})} {}
SystemManager::~SystemManager() { SystemManager::~SystemManager() {
Stop(); Stop();
@ -32,9 +27,7 @@ bool SystemManager::InitializeUnsafe() {
if (!active) { if (!active) {
if (adsp.Start()) { if (adsp.Start()) {
active = true; active = true;
thread = std::jthread([this](std::stop_token stop_token) { ThreadFunc(stop_token); }); thread = std::jthread([this](std::stop_token stop_token) { ThreadFunc(); });
core.CoreTiming().ScheduleLoopingEvent(std::chrono::nanoseconds(0), RENDER_TIME,
thread_event);
} }
} }
@ -45,13 +38,9 @@ void SystemManager::Stop() {
if (!active) { if (!active) {
return; return;
} }
core.CoreTiming().UnscheduleEvent(thread_event, {});
active = false; active = false;
{ update.store(true);
std::scoped_lock l{cv_mutex}; update.notify_all();
do_update = false;
}
thread.request_stop();
thread.join(); thread.join();
adsp.Stop(); adsp.Stop();
} }
@ -96,12 +85,12 @@ bool SystemManager::Remove(System& system_) {
return true; return true;
} }
void SystemManager::ThreadFunc(std::stop_token stop_token) { void SystemManager::ThreadFunc() {
static constexpr char name[]{"AudioRenderSystemManager"}; static constexpr char name[]{"AudioRenderSystemManager"};
MicroProfileOnThreadCreate(name); MicroProfileOnThreadCreate(name);
Common::SetCurrentThreadName(name); Common::SetCurrentThreadName(name);
Common::SetCurrentThreadPriority(Common::ThreadPriority::High); Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
while (active && !stop_token.stop_requested()) { while (active) {
{ {
std::scoped_lock l{mutex1}; std::scoped_lock l{mutex1};
@ -114,20 +103,7 @@ void SystemManager::ThreadFunc(std::stop_token stop_token) {
adsp.Signal(); adsp.Signal();
adsp.Wait(); adsp.Wait();
std::unique_lock l{cv_mutex};
Common::CondvarWait(update_cv, l, stop_token, [this]() { return do_update; });
do_update = false;
} }
} }
std::optional<std::chrono::nanoseconds> SystemManager::ThreadFunc2(s64 time) {
{
std::scoped_lock l{cv_mutex};
do_update = true;
}
update_cv.notify_all();
return std::nullopt;
}
} // namespace AudioCore::AudioRenderer } // namespace AudioCore::AudioRenderer

View file

@ -66,12 +66,13 @@ private:
/** /**
* Main thread responsible for command generation. * Main thread responsible for command generation.
*/ */
void ThreadFunc(std::stop_token stop_token); void ThreadFunc();
/** enum class StreamState {
* Signalling core timing thread to run ThreadFunc. Filling,
*/ Steady,
std::optional<std::chrono::nanoseconds> ThreadFunc2(s64 time); Draining,
};
/// Core system /// Core system
Core::System& core; Core::System& core;
@ -89,12 +90,8 @@ private:
ADSP::ADSP& adsp; ADSP::ADSP& adsp;
/// AudioRenderer mailbox for communication /// AudioRenderer mailbox for communication
ADSP::AudioRenderer_Mailbox* mailbox{}; ADSP::AudioRenderer_Mailbox* mailbox{};
/// Core timing event to signal main thread
std::shared_ptr<Core::Timing::EventType> thread_event;
/// Atomic for main thread to wait on /// Atomic for main thread to wait on
std::mutex cv_mutex{}; std::atomic<bool> update{};
bool do_update{};
std::condition_variable_any update_cv{};
}; };
} // namespace AudioCore::AudioRenderer } // namespace AudioCore::AudioRenderer

View file

@ -268,4 +268,10 @@ u64 SinkStream::GetExpectedPlayedSampleCount() {
return std::min<u64>(exp_played_sample_count, max_played_sample_count) + TargetSampleCount * 3; return std::min<u64>(exp_played_sample_count, max_played_sample_count) + TargetSampleCount * 3;
} }
void SinkStream::WaitFreeSpace() {
std::unique_lock lk{release_mutex};
release_cv.wait(
lk, [this]() { return queued_buffers < max_queue_size || system.IsShuttingDown(); });
}
} // namespace AudioCore::Sink } // namespace AudioCore::Sink

View file

@ -207,6 +207,11 @@ public:
*/ */
u64 GetExpectedPlayedSampleCount(); u64 GetExpectedPlayedSampleCount();
/**
* Waits for free space in the sample ring buffer
*/
void WaitFreeSpace();
protected: protected:
/// Core system /// Core system
Core::System& system; Core::System& system;

View file

@ -32,6 +32,7 @@ public:
bool IsEnabled() const; bool IsEnabled() const;
private: private:
// Number of times the function will be delayed until it outputs valid data
static constexpr std::size_t AMIIBO_UPDATE_DELAY = 15; static constexpr std::size_t AMIIBO_UPDATE_DELAY = 15;
struct TagFoundData { struct TagFoundData {

View file

@ -23,42 +23,94 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
common_ranges.clear(); common_ranges.clear();
inline_buffer_id = NULL_BUFFER_ID; inline_buffer_id = NULL_BUFFER_ID;
if (!runtime.CanReportMemoryUsage()) {
minimum_memory = DEFAULT_EXPECTED_MEMORY;
critical_memory = DEFAULT_CRITICAL_MEMORY;
return;
}
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; const u64 device_mem_per = device_memory / 100;
const s64 min_spacing_critical = device_memory - 1_GiB; minimum_memory = device_mem_per * 25;
const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); expected_memory = device_mem_per * 50;
const s64 min_vacancy_expected = (6 * mem_threshold) / 10; critical_memory = device_mem_per * 80;
const s64 min_vacancy_critical = (3 * mem_threshold) / 10; LOG_INFO(HW_GPU, "Buffer cache device memory limits: min {} expected {} critical {}",
minimum_memory = static_cast<u64>( minimum_memory, expected_memory, critical_memory);
std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
DEFAULT_EXPECTED_MEMORY));
critical_memory = static_cast<u64>(
std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
DEFAULT_CRITICAL_MEMORY));
} }
template <class P> template <class P>
void BufferCache<P>::RunGarbageCollector() { void BufferCache<P>::RunGarbageCollector() {
const bool aggressive_gc = total_used_memory >= critical_memory; if (total_used_memory < minimum_memory) {
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; return;
int num_iterations = aggressive_gc ? 64 : 32; }
const auto clean_up = [this, &num_iterations](BufferId buffer_id) { bool is_expected = total_used_memory >= expected_memory;
bool is_critical = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = is_critical ? 60ULL : is_expected ? 120ULL : 240ULL;
size_t num_iterations = is_critical ? 40 : (is_expected ? 20 : 10);
boost::container::small_vector<std::pair<BufferId, VideoCommon::BufferCopies>, 40> to_delete;
u64 total_size{0};
const auto clean_up = [&](BufferId buffer_id) {
if (num_iterations == 0) { if (num_iterations == 0) {
return true; return true;
} }
--num_iterations; --num_iterations;
auto& buffer = slot_buffers[buffer_id]; auto& buffer = slot_buffers[buffer_id];
DownloadBufferMemory(buffer); auto buffer_copies = FullDownloadCopies(buffer, buffer.CpuAddr(), buffer.SizeBytes());
DeleteBuffer(buffer_id); total_size += buffer_copies.total_size;
to_delete.push_back({buffer_id, std::move(buffer_copies)});
return false; return false;
}; };
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
if (total_size > 0) {
if constexpr (USE_MEMORY_MAPS) {
auto map = runtime.DownloadStagingBuffer(Common::AlignUp(total_size, 1024));
auto base_offset = map.offset;
for (auto& [buffer_id, buffer_copies] : to_delete) {
if (buffer_copies.total_size == 0) {
continue;
}
for (auto& copy : buffer_copies.copies) {
copy.dst_offset += map.offset;
}
auto& buffer = slot_buffers[buffer_id];
runtime.CopyBuffer(map.buffer, buffer, buffer_copies.copies);
map.offset += buffer_copies.total_size;
}
runtime.Finish();
for (auto& [buffer_id, buffer_copies] : to_delete) {
if (buffer_copies.total_size > 0) {
auto& buffer = slot_buffers[buffer_id];
for (const auto& copy : buffer_copies.copies) {
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
const u8* copy_mapped_memory =
map.mapped_span.data() + copy.dst_offset - base_offset;
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
}
}
DeleteBuffer(buffer_id);
}
} else {
for (auto& [buffer_id, buffer_copies] : to_delete) {
if (buffer_copies.total_size == 0) {
continue;
}
const std::span<u8> immediate_buffer = ImmediateBuffer(buffer_copies.total_size);
auto& buffer = slot_buffers[buffer_id];
for (const BufferCopy& copy : buffer_copies.copies) {
buffer.ImmediateDownload(copy.src_offset,
immediate_buffer.subspan(0, copy.size));
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
}
DeleteBuffer(buffer_id);
}
}
} else {
for (auto& [buffer_id, buffer_copies] : to_delete) {
DeleteBuffer(buffer_id);
}
}
} }
template <class P> template <class P>
@ -77,12 +129,10 @@ void BufferCache<P>::TickFrame() {
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
// If we can obtain the memory info, use it instead of the estimate. // If we can obtain the memory info, use it instead of the estimate.
if (runtime.CanReportMemoryUsage()) { if (runtime.CanReportMemoryUsage() && frame_tick % 60 == 0) {
total_used_memory = runtime.GetDeviceMemoryUsage(); total_used_memory = runtime.GetDeviceMemoryUsage();
} }
if (total_used_memory >= minimum_memory) { RunGarbageCollector();
RunGarbageCollector();
}
++frame_tick; ++frame_tick;
delayed_destruction_ring.Tick(); delayed_destruction_ring.Tick();
@ -1536,17 +1586,13 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
} }
template <class P> template <class P>
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { VideoCommon::BufferCopies BufferCache<P>::FullDownloadCopies(Buffer& buffer, VAddr cpu_addr,
DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); u64 size, bool clear) {
} boost::container::small_vector<BufferCopy, 16> copies;
template <class P>
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
boost::container::small_vector<BufferCopy, 1> copies;
u64 total_size_bytes = 0; u64 total_size_bytes = 0;
u64 largest_copy = 0; u64 largest_copy = 0;
memory_tracker.ForEachDownloadRangeAndClear( memory_tracker.ForEachDownloadRange(
cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { cpu_addr, size, clear, [&](u64 cpu_addr_out, u64 range_size) {
const VAddr buffer_addr = buffer.CpuAddr(); const VAddr buffer_addr = buffer.CpuAddr();
const auto add_download = [&](VAddr start, VAddr end) { const auto add_download = [&](VAddr start, VAddr end) {
const u64 new_offset = start - buffer_addr; const u64 new_offset = start - buffer_addr;
@ -1570,22 +1616,35 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
ClearDownload(subtract_interval); ClearDownload(subtract_interval);
common_ranges.subtract(subtract_interval); common_ranges.subtract(subtract_interval);
}); });
if (total_size_bytes == 0) { return {total_size_bytes, largest_copy, std::move(copies)};
}
template <class P>
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
}
template <class P>
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
auto buffer_copies = FullDownloadCopies(buffer, cpu_addr, size);
if (buffer_copies.total_size == 0) {
return; return;
} }
MICROPROFILE_SCOPE(GPU_DownloadMemory); MICROPROFILE_SCOPE(GPU_DownloadMemory);
if constexpr (USE_MEMORY_MAPS) { if constexpr (USE_MEMORY_MAPS) {
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); auto download_staging = runtime.DownloadStagingBuffer(buffer_copies.total_size);
const u8* const mapped_memory = download_staging.mapped_span.data(); const u8* const mapped_memory = download_staging.mapped_span.data();
const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); const std::span<BufferCopy> copies_span(buffer_copies.copies.data(),
for (BufferCopy& copy : copies) { buffer_copies.copies.size());
for (BufferCopy& copy : buffer_copies.copies) {
// Modify copies to have the staging offset in mind // Modify copies to have the staging offset in mind
copy.dst_offset += download_staging.offset; copy.dst_offset += download_staging.offset;
} }
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
runtime.Finish(); runtime.Finish();
for (const BufferCopy& copy : copies) { for (const BufferCopy& copy : buffer_copies.copies) {
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
// Undo the modified offset // Undo the modified offset
const u64 dst_offset = copy.dst_offset - download_staging.offset; const u64 dst_offset = copy.dst_offset - download_staging.offset;
@ -1593,8 +1652,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
} }
} else { } else {
const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); const std::span<u8> immediate_buffer = ImmediateBuffer(buffer_copies.largest_copy);
for (const BufferCopy& copy : copies) { for (const BufferCopy& copy : buffer_copies.copies) {
buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);

View file

@ -57,8 +57,6 @@ MICROPROFILE_DECLARE(GPU_PrepareBuffers);
MICROPROFILE_DECLARE(GPU_BindUploadBuffers); MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
MICROPROFILE_DECLARE(GPU_DownloadMemory); MICROPROFILE_DECLARE(GPU_DownloadMemory);
using BufferId = SlotId;
using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormat;
using namespace Common::Literals; using namespace Common::Literals;
@ -464,6 +462,9 @@ private:
void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
[[nodiscard]] VideoCommon::BufferCopies FullDownloadCopies(Buffer& buffer, VAddr cpu_addr,
u64 size, bool clear = true);
void DownloadBufferMemory(Buffer& buffer_id); void DownloadBufferMemory(Buffer& buffer_id);
void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
@ -566,6 +567,7 @@ private:
u64 frame_tick = 0; u64 frame_tick = 0;
u64 total_used_memory = 0; u64 total_used_memory = 0;
u64 minimum_memory = 0; u64 minimum_memory = 0;
u64 expected_memory = 0;
u64 critical_memory = 0; u64 critical_memory = 0;
BufferId inline_buffer_id; BufferId inline_buffer_id;

View file

@ -47,35 +47,31 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
void(slot_image_views.insert(runtime, NullImageViewParams{})); void(slot_image_views.insert(runtime, NullImageViewParams{}));
void(slot_samplers.insert(runtime, sampler_descriptor)); void(slot_samplers.insert(runtime, sampler_descriptor));
if constexpr (HAS_DEVICE_MEMORY_INFO) { const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); const u64 device_mem_per = device_memory / 100;
const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; minimum_memory = device_mem_per * 25;
const s64 min_spacing_critical = device_memory - 1_GiB; expected_memory = device_mem_per * 50;
const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); critical_memory = device_mem_per * 80;
const s64 min_vacancy_expected = (6 * mem_threshold) / 10; LOG_INFO(HW_GPU, "Texture cache device memory limits: min {} expected {} critical {}",
const s64 min_vacancy_critical = (3 * mem_threshold) / 10; minimum_memory, expected_memory, critical_memory);
expected_memory = static_cast<u64>(
std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
DEFAULT_EXPECTED_MEMORY));
critical_memory = static_cast<u64>(
std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
DEFAULT_CRITICAL_MEMORY));
minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2);
} else {
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
minimum_memory = 0;
}
} }
template <class P> template <class P>
void TextureCache<P>::RunGarbageCollector() { void TextureCache<P>::RunGarbageCollector() {
bool high_priority_mode = total_used_memory >= expected_memory; if (total_used_memory < minimum_memory) {
bool aggressive_mode = total_used_memory >= critical_memory; return;
const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; }
size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); bool is_expected = total_used_memory >= expected_memory;
const auto clean_up = [this, &num_iterations, &high_priority_mode, bool is_critical = total_used_memory >= critical_memory;
&aggressive_mode](ImageId image_id) { const u64 ticks_to_destroy = is_critical ? 10ULL : is_expected ? 25ULL : 50ULL;
size_t num_iterations = is_critical ? 40 : (is_expected ? 20 : 10);
boost::container::small_vector<
std::tuple<ImageId, bool, boost::container::small_vector<BufferImageCopy, 16>>, 40>
to_delete;
u64 total_download_size{0};
u32 largest_download_size{0};
const auto clean_up = [&](ImageId image_id) {
if (num_iterations == 0) { if (num_iterations == 0) {
return true; return true;
} }
@ -86,51 +82,70 @@ void TextureCache<P>::RunGarbageCollector() {
// used by the async decoder thread. // used by the async decoder thread.
return false; return false;
} }
const bool must_download =
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); const bool do_download = image.IsSafeDownload() &&
if (!high_priority_mode && False(image.flags & ImageFlagBits::BadOverlap) &&
(must_download || True(image.flags & ImageFlagBits::CostlyLoad))) { (False(image.flags & ImageFlagBits::CostlyLoad) || is_critical);
return false; if (do_download) {
} total_download_size += image.unswizzled_size_bytes;
if (must_download) { largest_download_size = std::max(largest_download_size, image.unswizzled_size_bytes);
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
if (total_used_memory < critical_memory) {
if (aggressive_mode) {
// Sink the aggresiveness.
num_iterations >>= 2;
aggressive_mode = false;
return false;
}
if (high_priority_mode && total_used_memory < expected_memory) {
num_iterations >>= 1;
high_priority_mode = false;
}
} }
to_delete.push_back({image_id, do_download, {}});
return false; return false;
}; };
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
if (total_download_size > 0) {
auto map = runtime.DownloadStagingBuffer(total_download_size);
for (auto& [image_id, do_download, copies] : to_delete) {
if (!do_download) {
continue;
}
Image& image = slot_images[image_id];
copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
}
runtime.Finish();
swizzle_data_buffer.resize_destructive(Common::AlignUp(largest_download_size, 1024));
u64 offset{0};
for (auto& [image_id, do_download, copies] : to_delete) {
Image& image = slot_images[image_id];
if (do_download) {
for (auto& copy : copies) {
copy.buffer_offset += offset;
}
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer);
offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
}
} else {
for (auto& [image_id, do_download, copies] : to_delete) {
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
}
}
} }
template <class P> template <class P>
void TextureCache<P>::TickFrame() { void TextureCache<P>::TickFrame() {
// If we can obtain the memory info, use it instead of the estimate. // If we can obtain the memory info, use it instead of the estimate.
if (runtime.CanReportMemoryUsage()) { if (runtime.CanReportMemoryUsage() && frame_tick % 60 == 0) {
total_used_memory = runtime.GetDeviceMemoryUsage(); total_used_memory = runtime.GetDeviceMemoryUsage();
} }
if (total_used_memory > minimum_memory) { RunGarbageCollector();
RunGarbageCollector();
}
sentenced_images.Tick(); sentenced_images.Tick();
sentenced_framebuffers.Tick(); sentenced_framebuffers.Tick();
sentenced_image_view.Tick(); sentenced_image_view.Tick();
@ -1397,6 +1412,27 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
return lhs_image.modification_tick < rhs_image.modification_tick; return lhs_image.modification_tick < rhs_image.modification_tick;
}); });
for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
new_image.flags |= ImageFlagBits::GpuModified;
const auto& resolution = Settings::values.resolution_info;
const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
const u32 up_scale = can_rescale ? resolution.up_scale : 1;
const u32 down_shift = can_rescale ? resolution.down_shift : 0;
auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
if (overlap.info.num_samples != new_image.info.num_samples) {
runtime.CopyImageMSAA(new_image, overlap, std::move(copies));
} else {
runtime.CopyImage(new_image, overlap, std::move(copies));
}
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id);
}
UnregisterImage(overlap_id);
DeleteImage(overlap_id);
}
ImageBase& new_image_base = new_image; ImageBase& new_image_base = new_image;
for (const ImageId aliased_id : right_aliased_ids) { for (const ImageId aliased_id : right_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id]; ImageBase& aliased = slot_images[aliased_id];
@ -1419,33 +1455,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
new_image.flags |= ImageFlagBits::BadOverlap; new_image.flags |= ImageFlagBits::BadOverlap;
} }
} }
SynchronizeAliases(new_image_id);
for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified) &&
overlap.modification_tick > new_image.modification_tick) {
new_image.flags |= ImageFlagBits::GpuModified;
const auto& resolution = Settings::values.resolution_info;
const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
const u32 up_scale = can_rescale ? resolution.up_scale : 1;
const u32 down_shift = can_rescale ? resolution.down_shift : 0;
auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
if (overlap.info.num_samples != new_image.info.num_samples) {
runtime.CopyImageMSAA(new_image, overlap, std::move(copies));
} else {
runtime.CopyImage(new_image, overlap, std::move(copies));
}
new_image.modification_tick = overlap.modification_tick;
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id);
}
UnregisterImage(overlap_id);
DeleteImage(overlap_id);
}
RegisterImage(new_image_id); RegisterImage(new_image_id);
return new_image_id; return new_image_id;
} }

View file

@ -3,6 +3,8 @@
#pragma once #pragma once
#include <boost/container/small_vector.hpp>
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/slot_vector.h"
@ -14,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14;
constexpr SlotId CORRUPT_ID{0xfffffffe}; constexpr SlotId CORRUPT_ID{0xfffffffe};
using BufferId = SlotId;
using ImageId = SlotId; using ImageId = SlotId;
using ImageMapId = SlotId; using ImageMapId = SlotId;
using ImageViewId = SlotId; using ImageViewId = SlotId;
@ -146,6 +149,12 @@ struct BufferCopy {
size_t size; size_t size;
}; };
struct BufferCopies {
u64 total_size;
u64 largest_copy;
boost::container::small_vector<BufferCopy, 16> copies;
};
struct SwizzleParameters { struct SwizzleParameters {
Extent3D num_tiles; Extent3D num_tiles;
Extent3D block; Extent3D block;

View file

@ -914,7 +914,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
} }
} }
std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
const Extent3D size = info.size; const Extent3D size = info.size;
const u32 bytes_per_block = BytesPerBlock(info.format); const u32 bytes_per_block = BytesPerBlock(info.format);
if (info.type == ImageType::Linear) { if (info.type == ImageType::Linear) {
@ -942,7 +942,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
u32 host_offset = 0; u32 host_offset = 0;
std::vector<BufferImageCopy> copies(num_levels); boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) { for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level); const Extent3D level_size = AdjustMipSize(size, level);
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);

View file

@ -5,6 +5,7 @@
#include <optional> #include <optional>
#include <span> #include <span>
#include <boost/container/small_vector.hpp>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/scratch_buffer.h" #include "common/scratch_buffer.h"
@ -73,7 +74,8 @@ struct OverlapResult {
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies); std::span<BufferImageCopy> copies);
[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
const ImageInfo& info);
[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);

View file

@ -1017,7 +1017,7 @@ void Device::CollectPhysicalMemoryInfo() {
device_access_memory += mem_properties.memoryHeaps[element].size; device_access_memory += mem_properties.memoryHeaps[element].size;
} }
if (!is_integrated) { if (!is_integrated) {
const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 2_GiB); const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB);
device_access_memory -= reserve_memory; device_access_memory -= reserve_memory;
return; return;
} }