From d18469456b58238c5bac306dff1cd04f1f5d58b7 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Mon, 15 May 2023 04:33:04 +0200 Subject: [PATCH] early-access version 3591 --- README.md | 2 +- .../renderer/adsp/audio_renderer.cpp | 2 + src/audio_core/renderer/system_manager.cpp | 36 +--- src/audio_core/renderer/system_manager.h | 17 +- src/audio_core/sink/sink_stream.cpp | 6 + src/audio_core/sink/sink_stream.h | 5 + .../helpers/joycon_protocol/nfc.h | 1 + src/video_core/buffer_cache/buffer_cache.h | 145 +++++++++----- .../buffer_cache/buffer_cache_base.h | 6 +- src/video_core/texture_cache/texture_cache.h | 181 +++++++++--------- src/video_core/texture_cache/types.h | 9 + src/video_core/texture_cache/util.cpp | 4 +- src/video_core/texture_cache/util.h | 4 +- .../vulkan_common/vulkan_device.cpp | 2 +- 14 files changed, 244 insertions(+), 176 deletions(-) diff --git a/README.md b/README.md index d7ddc0698..14a647f05 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 3589. +This is the source code for early-access 3591. ## Legal Notice diff --git a/src/audio_core/renderer/adsp/audio_renderer.cpp b/src/audio_core/renderer/adsp/audio_renderer.cpp index 3ba1cfc55..fcf49b299 100755 --- a/src/audio_core/renderer/adsp/audio_renderer.cpp +++ b/src/audio_core/renderer/adsp/audio_renderer.cpp @@ -188,6 +188,8 @@ void AudioRenderer::ThreadFunc() { max_time = std::min(command_buffer.time_limit, max_time); command_list_processor.SetProcessTimeMax(max_time); + streams[index]->WaitFreeSpace(); + // Process the command list { MICROPROFILE_SCOPE(Audio_Renderer); diff --git a/src/audio_core/renderer/system_manager.cpp b/src/audio_core/renderer/system_manager.cpp index cc59a2883..5f96579b8 100755 --- a/src/audio_core/renderer/system_manager.cpp +++ b/src/audio_core/renderer/system_manager.cpp @@ -15,14 +15,9 @@ MICROPROFILE_DEFINE(Audio_RenderSystemManager, "Audio", "Render System Manager", MP_RGB(60, 19, 97)); namespace AudioCore::AudioRenderer { -constexpr std::chrono::nanoseconds RENDER_TIME{5'000'000UL}; SystemManager::SystemManager(Core::System& core_) - : core{core_}, adsp{core.AudioCore().GetADSP()}, mailbox{adsp.GetRenderMailbox()}, - thread_event{Core::Timing::CreateEvent( - "AudioRendererSystemManager", [this](std::uintptr_t, s64 time, std::chrono::nanoseconds) { - return ThreadFunc2(time); - })} {} + : core{core_}, adsp{core.AudioCore().GetADSP()}, mailbox{adsp.GetRenderMailbox()} {} SystemManager::~SystemManager() { Stop(); @@ -32,9 +27,7 @@ bool SystemManager::InitializeUnsafe() { if (!active) { if (adsp.Start()) { active = true; - thread = std::jthread([this](std::stop_token stop_token) { ThreadFunc(stop_token); }); - core.CoreTiming().ScheduleLoopingEvent(std::chrono::nanoseconds(0), RENDER_TIME, - thread_event); + thread = std::jthread([this](std::stop_token stop_token) { ThreadFunc(); }); } } @@ -45,13 +38,9 @@ void SystemManager::Stop() { if (!active) { return; } - core.CoreTiming().UnscheduleEvent(thread_event, {}); active = false; - { - std::scoped_lock l{cv_mutex}; - do_update = false; - } - thread.request_stop(); + update.store(true); + update.notify_all(); thread.join(); adsp.Stop(); } @@ -96,12 +85,12 @@ bool SystemManager::Remove(System& system_) { return true; } -void SystemManager::ThreadFunc(std::stop_token stop_token) { +void SystemManager::ThreadFunc() { static constexpr char name[]{"AudioRenderSystemManager"}; MicroProfileOnThreadCreate(name); Common::SetCurrentThreadName(name); Common::SetCurrentThreadPriority(Common::ThreadPriority::High); - while (active && !stop_token.stop_requested()) { + while (active) { { std::scoped_lock l{mutex1}; @@ -114,20 +103,7 @@ void SystemManager::ThreadFunc(std::stop_token stop_token) { adsp.Signal(); adsp.Wait(); - - std::unique_lock l{cv_mutex}; - Common::CondvarWait(update_cv, l, stop_token, [this]() { return do_update; }); - do_update = false; } } -std::optional SystemManager::ThreadFunc2(s64 time) { - { - std::scoped_lock l{cv_mutex}; - do_update = true; - } - update_cv.notify_all(); - return std::nullopt; -} - } // namespace AudioCore::AudioRenderer diff --git a/src/audio_core/renderer/system_manager.h b/src/audio_core/renderer/system_manager.h index 41bbbdac1..cfaa4053b 100755 --- a/src/audio_core/renderer/system_manager.h +++ b/src/audio_core/renderer/system_manager.h @@ -66,12 +66,13 @@ private: /** * Main thread responsible for command generation. */ - void ThreadFunc(std::stop_token stop_token); + void ThreadFunc(); - /** - * Signalling core timing thread to run ThreadFunc. - */ - std::optional ThreadFunc2(s64 time); + enum class StreamState { + Filling, + Steady, + Draining, + }; /// Core system Core::System& core; @@ -89,12 +90,8 @@ private: ADSP::ADSP& adsp; /// AudioRenderer mailbox for communication ADSP::AudioRenderer_Mailbox* mailbox{}; - /// Core timing event to signal main thread - std::shared_ptr thread_event; /// Atomic for main thread to wait on - std::mutex cv_mutex{}; - bool do_update{}; - std::condition_variable_any update_cv{}; + std::atomic update{}; }; } // namespace AudioCore::AudioRenderer diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp index 49ca91efd..d57be6189 100755 --- a/src/audio_core/sink/sink_stream.cpp +++ b/src/audio_core/sink/sink_stream.cpp @@ -268,4 +268,10 @@ u64 SinkStream::GetExpectedPlayedSampleCount() { return std::min(exp_played_sample_count, max_played_sample_count) + TargetSampleCount * 3; } +void SinkStream::WaitFreeSpace() { + std::unique_lock lk{release_mutex}; + release_cv.wait( + lk, [this]() { return queued_buffers < max_queue_size || system.IsShuttingDown(); }); +} + } // namespace AudioCore::Sink diff --git a/src/audio_core/sink/sink_stream.h b/src/audio_core/sink/sink_stream.h index e2f8fb97f..1e7658818 100755 --- a/src/audio_core/sink/sink_stream.h +++ b/src/audio_core/sink/sink_stream.h @@ -207,6 +207,11 @@ public: */ u64 GetExpectedPlayedSampleCount(); + /** + * Waits for free space in the sample ring buffer + */ + void WaitFreeSpace(); + protected: /// Core system Core::System& system; diff --git a/src/input_common/helpers/joycon_protocol/nfc.h b/src/input_common/helpers/joycon_protocol/nfc.h index eddf5932e..4cb992d1d 100755 --- a/src/input_common/helpers/joycon_protocol/nfc.h +++ b/src/input_common/helpers/joycon_protocol/nfc.h @@ -32,6 +32,7 @@ public: bool IsEnabled() const; private: + // Number of times the function will be delayed until it outputs valid data static constexpr std::size_t AMIIBO_UPDATE_DELAY = 15; struct TagFoundData { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 8e83ba329..5b0caf810 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -23,42 +23,94 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, common_ranges.clear(); inline_buffer_id = NULL_BUFFER_ID; - if (!runtime.CanReportMemoryUsage()) { - minimum_memory = DEFAULT_EXPECTED_MEMORY; - critical_memory = DEFAULT_CRITICAL_MEMORY; - return; - } - const s64 device_memory = static_cast(runtime.GetDeviceLocalMemory()); - const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; - const s64 min_spacing_critical = device_memory - 1_GiB; - const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); - const s64 min_vacancy_expected = (6 * mem_threshold) / 10; - const s64 min_vacancy_critical = (3 * mem_threshold) / 10; - minimum_memory = static_cast( - std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), - DEFAULT_EXPECTED_MEMORY)); - critical_memory = static_cast( - std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), - DEFAULT_CRITICAL_MEMORY)); + const u64 device_mem_per = device_memory / 100; + minimum_memory = device_mem_per * 25; + expected_memory = device_mem_per * 50; + critical_memory = device_mem_per * 80; + LOG_INFO(HW_GPU, "Buffer cache device memory limits: min {} expected {} critical {}", + minimum_memory, expected_memory, critical_memory); } template void BufferCache

::RunGarbageCollector() { - const bool aggressive_gc = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; - int num_iterations = aggressive_gc ? 64 : 32; - const auto clean_up = [this, &num_iterations](BufferId buffer_id) { + if (total_used_memory < minimum_memory) { + return; + } + bool is_expected = total_used_memory >= expected_memory; + bool is_critical = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = is_critical ? 60ULL : is_expected ? 120ULL : 240ULL; + size_t num_iterations = is_critical ? 40 : (is_expected ? 20 : 10); + boost::container::small_vector, 40> to_delete; + u64 total_size{0}; + + const auto clean_up = [&](BufferId buffer_id) { if (num_iterations == 0) { return true; } --num_iterations; auto& buffer = slot_buffers[buffer_id]; - DownloadBufferMemory(buffer); - DeleteBuffer(buffer_id); + auto buffer_copies = FullDownloadCopies(buffer, buffer.CpuAddr(), buffer.SizeBytes()); + total_size += buffer_copies.total_size; + to_delete.push_back({buffer_id, std::move(buffer_copies)}); return false; }; lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); + + if (total_size > 0) { + if constexpr (USE_MEMORY_MAPS) { + auto map = runtime.DownloadStagingBuffer(Common::AlignUp(total_size, 1024)); + auto base_offset = map.offset; + + for (auto& [buffer_id, buffer_copies] : to_delete) { + if (buffer_copies.total_size == 0) { + continue; + } + + for (auto& copy : buffer_copies.copies) { + copy.dst_offset += map.offset; + } + + auto& buffer = slot_buffers[buffer_id]; + runtime.CopyBuffer(map.buffer, buffer, buffer_copies.copies); + map.offset += buffer_copies.total_size; + } + + runtime.Finish(); + + for (auto& [buffer_id, buffer_copies] : to_delete) { + if (buffer_copies.total_size > 0) { + auto& buffer = slot_buffers[buffer_id]; + for (const auto& copy : buffer_copies.copies) { + const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; + const u8* copy_mapped_memory = + map.mapped_span.data() + copy.dst_offset - base_offset; + cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); + } + } + DeleteBuffer(buffer_id); + } + } else { + for (auto& [buffer_id, buffer_copies] : to_delete) { + if (buffer_copies.total_size == 0) { + continue; + } + const std::span immediate_buffer = ImmediateBuffer(buffer_copies.total_size); + auto& buffer = slot_buffers[buffer_id]; + for (const BufferCopy& copy : buffer_copies.copies) { + buffer.ImmediateDownload(copy.src_offset, + immediate_buffer.subspan(0, copy.size)); + const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; + cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); + } + DeleteBuffer(buffer_id); + } + } + } else { + for (auto& [buffer_id, buffer_copies] : to_delete) { + DeleteBuffer(buffer_id); + } + } } template @@ -77,12 +129,10 @@ void BufferCache

::TickFrame() { uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; // If we can obtain the memory info, use it instead of the estimate. - if (runtime.CanReportMemoryUsage()) { + if (runtime.CanReportMemoryUsage() && frame_tick % 60 == 0) { total_used_memory = runtime.GetDeviceMemoryUsage(); } - if (total_used_memory >= minimum_memory) { - RunGarbageCollector(); - } + RunGarbageCollector(); ++frame_tick; delayed_destruction_ring.Tick(); @@ -1536,17 +1586,13 @@ bool BufferCache

::InlineMemory(VAddr dest_address, size_t copy_size, } template -void BufferCache

::DownloadBufferMemory(Buffer& buffer) { - DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); -} - -template -void BufferCache

::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { - boost::container::small_vector copies; +VideoCommon::BufferCopies BufferCache

::FullDownloadCopies(Buffer& buffer, VAddr cpu_addr, + u64 size, bool clear) { + boost::container::small_vector copies; u64 total_size_bytes = 0; u64 largest_copy = 0; - memory_tracker.ForEachDownloadRangeAndClear( - cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { + memory_tracker.ForEachDownloadRange( + cpu_addr, size, clear, [&](u64 cpu_addr_out, u64 range_size) { const VAddr buffer_addr = buffer.CpuAddr(); const auto add_download = [&](VAddr start, VAddr end) { const u64 new_offset = start - buffer_addr; @@ -1570,22 +1616,35 @@ void BufferCache

::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si ClearDownload(subtract_interval); common_ranges.subtract(subtract_interval); }); - if (total_size_bytes == 0) { + return {total_size_bytes, largest_copy, std::move(copies)}; +} + +template +void BufferCache

::DownloadBufferMemory(Buffer& buffer) { + DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); +} + +template +void BufferCache

::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { + auto buffer_copies = FullDownloadCopies(buffer, cpu_addr, size); + if (buffer_copies.total_size == 0) { return; } + MICROPROFILE_SCOPE(GPU_DownloadMemory); if constexpr (USE_MEMORY_MAPS) { - auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); + auto download_staging = runtime.DownloadStagingBuffer(buffer_copies.total_size); const u8* const mapped_memory = download_staging.mapped_span.data(); - const std::span copies_span(copies.data(), copies.data() + copies.size()); - for (BufferCopy& copy : copies) { + const std::span copies_span(buffer_copies.copies.data(), + buffer_copies.copies.size()); + for (BufferCopy& copy : buffer_copies.copies) { // Modify copies to have the staging offset in mind copy.dst_offset += download_staging.offset; } runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); runtime.Finish(); - for (const BufferCopy& copy : copies) { + for (const BufferCopy& copy : buffer_copies.copies) { const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; // Undo the modified offset const u64 dst_offset = copy.dst_offset - download_staging.offset; @@ -1593,8 +1652,8 @@ void BufferCache

::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); } } else { - const std::span immediate_buffer = ImmediateBuffer(largest_copy); - for (const BufferCopy& copy : copies) { + const std::span immediate_buffer = ImmediateBuffer(buffer_copies.largest_copy); + for (const BufferCopy& copy : buffer_copies.copies) { buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index ac00d4d9d..ad380e7bb 100755 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -57,8 +57,6 @@ MICROPROFILE_DECLARE(GPU_PrepareBuffers); MICROPROFILE_DECLARE(GPU_BindUploadBuffers); MICROPROFILE_DECLARE(GPU_DownloadMemory); -using BufferId = SlotId; - using VideoCore::Surface::PixelFormat; using namespace Common::Literals; @@ -464,6 +462,9 @@ private: void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span copies); + [[nodiscard]] VideoCommon::BufferCopies FullDownloadCopies(Buffer& buffer, VAddr cpu_addr, + u64 size, bool clear = true); + void DownloadBufferMemory(Buffer& buffer_id); void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); @@ -566,6 +567,7 @@ private: u64 frame_tick = 0; u64 total_used_memory = 0; u64 minimum_memory = 0; + u64 expected_memory = 0; u64 critical_memory = 0; BufferId inline_buffer_id; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8ddd2f4da..816c256e1 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -47,35 +47,31 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_image_views.insert(runtime, NullImageViewParams{})); void(slot_samplers.insert(runtime, sampler_descriptor)); - if constexpr (HAS_DEVICE_MEMORY_INFO) { - const s64 device_memory = static_cast(runtime.GetDeviceLocalMemory()); - const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; - const s64 min_spacing_critical = device_memory - 1_GiB; - const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); - const s64 min_vacancy_expected = (6 * mem_threshold) / 10; - const s64 min_vacancy_critical = (3 * mem_threshold) / 10; - expected_memory = static_cast( - std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), - DEFAULT_EXPECTED_MEMORY)); - critical_memory = static_cast( - std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), - DEFAULT_CRITICAL_MEMORY)); - minimum_memory = static_cast((device_memory - mem_threshold) / 2); - } else { - expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; - critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; - minimum_memory = 0; - } + const s64 device_memory = static_cast(runtime.GetDeviceLocalMemory()); + const u64 device_mem_per = device_memory / 100; + minimum_memory = device_mem_per * 25; + expected_memory = device_mem_per * 50; + critical_memory = device_mem_per * 80; + LOG_INFO(HW_GPU, "Texture cache device memory limits: min {} expected {} critical {}", + minimum_memory, expected_memory, critical_memory); } template void TextureCache

::RunGarbageCollector() { - bool high_priority_mode = total_used_memory >= expected_memory; - bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; - size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); - const auto clean_up = [this, &num_iterations, &high_priority_mode, - &aggressive_mode](ImageId image_id) { + if (total_used_memory < minimum_memory) { + return; + } + bool is_expected = total_used_memory >= expected_memory; + bool is_critical = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = is_critical ? 10ULL : is_expected ? 25ULL : 50ULL; + size_t num_iterations = is_critical ? 40 : (is_expected ? 20 : 10); + boost::container::small_vector< + std::tuple>, 40> + to_delete; + u64 total_download_size{0}; + u32 largest_download_size{0}; + + const auto clean_up = [&](ImageId image_id) { if (num_iterations == 0) { return true; } @@ -86,51 +82,70 @@ void TextureCache

::RunGarbageCollector() { // used by the async decoder thread. return false; } - const bool must_download = - image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); - if (!high_priority_mode && - (must_download || True(image.flags & ImageFlagBits::CostlyLoad))) { - return false; - } - if (must_download) { - auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, - swizzle_data_buffer); - } - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, image_id); - } - UnregisterImage(image_id); - DeleteImage(image_id, image.scale_tick > frame_tick + 5); - if (total_used_memory < critical_memory) { - if (aggressive_mode) { - // Sink the aggresiveness. - num_iterations >>= 2; - aggressive_mode = false; - return false; - } - if (high_priority_mode && total_used_memory < expected_memory) { - num_iterations >>= 1; - high_priority_mode = false; - } + + const bool do_download = image.IsSafeDownload() && + False(image.flags & ImageFlagBits::BadOverlap) && + (False(image.flags & ImageFlagBits::CostlyLoad) || is_critical); + if (do_download) { + total_download_size += image.unswizzled_size_bytes; + largest_download_size = std::max(largest_download_size, image.unswizzled_size_bytes); } + to_delete.push_back({image_id, do_download, {}}); return false; }; lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); + + if (total_download_size > 0) { + auto map = runtime.DownloadStagingBuffer(total_download_size); + for (auto& [image_id, do_download, copies] : to_delete) { + if (!do_download) { + continue; + } + Image& image = slot_images[image_id]; + copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); + } + + runtime.Finish(); + swizzle_data_buffer.resize_destructive(Common::AlignUp(largest_download_size, 1024)); + + u64 offset{0}; + for (auto& [image_id, do_download, copies] : to_delete) { + Image& image = slot_images[image_id]; + if (do_download) { + for (auto& copy : copies) { + copy.buffer_offset += offset; + } + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, + swizzle_data_buffer); + offset += Common::AlignUp(image.unswizzled_size_bytes, 64); + } + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id, image.scale_tick > frame_tick + 5); + } + } else { + for (auto& [image_id, do_download, copies] : to_delete) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id, image.scale_tick > frame_tick + 5); + } + } } template void TextureCache

::TickFrame() { // If we can obtain the memory info, use it instead of the estimate. - if (runtime.CanReportMemoryUsage()) { + if (runtime.CanReportMemoryUsage() && frame_tick % 60 == 0) { total_used_memory = runtime.GetDeviceMemoryUsage(); } - if (total_used_memory > minimum_memory) { - RunGarbageCollector(); - } + RunGarbageCollector(); sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); @@ -1397,6 +1412,27 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA return lhs_image.modification_tick < rhs_image.modification_tick; }); + for (const ImageId overlap_id : overlap_ids) { + Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + new_image.flags |= ImageFlagBits::GpuModified; + const auto& resolution = Settings::values.resolution_info; + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const u32 up_scale = can_rescale ? resolution.up_scale : 1; + const u32 down_shift = can_rescale ? resolution.down_shift : 0; + auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); + if (overlap.info.num_samples != new_image.info.num_samples) { + runtime.CopyImageMSAA(new_image, overlap, std::move(copies)); + } else { + runtime.CopyImage(new_image, overlap, std::move(copies)); + } + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } ImageBase& new_image_base = new_image; for (const ImageId aliased_id : right_aliased_ids) { ImageBase& aliased = slot_images[aliased_id]; @@ -1419,33 +1455,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA new_image.flags |= ImageFlagBits::BadOverlap; } } - - SynchronizeAliases(new_image_id); - - for (const ImageId overlap_id : overlap_ids) { - Image& overlap = slot_images[overlap_id]; - if (True(overlap.flags & ImageFlagBits::GpuModified) && - overlap.modification_tick > new_image.modification_tick) { - new_image.flags |= ImageFlagBits::GpuModified; - const auto& resolution = Settings::values.resolution_info; - const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const u32 up_scale = can_rescale ? resolution.up_scale : 1; - const u32 down_shift = can_rescale ? resolution.down_shift : 0; - auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); - if (overlap.info.num_samples != new_image.info.num_samples) { - runtime.CopyImageMSAA(new_image, overlap, std::move(copies)); - } else { - runtime.CopyImage(new_image, overlap, std::move(copies)); - } - new_image.modification_tick = overlap.modification_tick; - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - RegisterImage(new_image_id); return new_image_id; } diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 3810b3dae..37bca97c9 100755 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -3,6 +3,8 @@ #pragma once +#include + #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/texture_cache/slot_vector.h" @@ -14,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14; constexpr SlotId CORRUPT_ID{0xfffffffe}; +using BufferId = SlotId; using ImageId = SlotId; using ImageMapId = SlotId; using ImageViewId = SlotId; @@ -146,6 +149,12 @@ struct BufferCopy { size_t size; }; +struct BufferCopies { + u64 total_size; + u64 largest_copy; + boost::container::small_vector copies; +}; + struct SwizzleParameters { Extent3D num_tiles; Extent3D block; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index ca42f6993..ed4da0cae 100755 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -914,7 +914,7 @@ void ConvertImage(std::span input, const ImageInfo& info, std::span FullDownloadCopies(const ImageInfo& info) { +boost::container::small_vector FullDownloadCopies(const ImageInfo& info) { const Extent3D size = info.size; const u32 bytes_per_block = BytesPerBlock(info.format); if (info.type == ImageType::Linear) { @@ -942,7 +942,7 @@ std::vector FullDownloadCopies(const ImageInfo& info) { u32 host_offset = 0; - std::vector copies(num_levels); + boost::container::small_vector copies(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 0e96da4a4..8501a89ee 100755 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -5,6 +5,7 @@ #include #include +#include #include "common/common_types.h" #include "common/scratch_buffer.h" @@ -73,7 +74,8 @@ struct OverlapResult { void ConvertImage(std::span input, const ImageInfo& info, std::span output, std::span copies); -[[nodiscard]] std::vector FullDownloadCopies(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector FullDownloadCopies( + const ImageInfo& info); [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index cfaa85836..319efba9e 100755 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1017,7 +1017,7 @@ void Device::CollectPhysicalMemoryInfo() { device_access_memory += mem_properties.memoryHeaps[element].size; } if (!is_integrated) { - const u64 reserve_memory = std::min(device_access_memory / 8, 2_GiB); + const u64 reserve_memory = std::min(device_access_memory / 8, 1_GiB); device_access_memory -= reserve_memory; return; }