From 02452680a75484065d85175924be6a763f440aaf Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Fri, 29 Sep 2023 15:30:08 +0200 Subject: [PATCH] early-access version 3905 --- README.md | 2 +- src/common/settings.h | 2 + src/core/CMakeLists.txt | 2 + src/core/hle/service/am/am.cpp | 159 +++++++- src/core/hle/service/am/am.h | 17 + src/core/hle/service/am/applets/applets.cpp | 16 + src/core/hle/service/am/applets/applets.h | 15 +- src/core/hle/service/ldn/ldn.cpp | 91 ++++- src/core/hle/service/nfc/common/device.cpp | 9 +- src/core/hle/service/nvdrv/devices/nvmap.h | 14 +- src/core/hle/service/nvnflinger/buffer_item.h | 2 +- src/core/hle/service/nvnflinger/buffer_slot.h | 2 +- .../nvnflinger/fb_share_buffer_manager.cpp | 351 ++++++++++++++++++ .../nvnflinger/fb_share_buffer_manager.h | 65 ++++ .../nvnflinger/graphic_buffer_producer.h | 2 +- .../hle/service/nvnflinger/nvnflinger.cpp | 11 + src/core/hle/service/nvnflinger/nvnflinger.h | 9 + src/core/hle/service/nvnflinger/ui/fence.h | 3 + .../service/nvnflinger/ui/graphic_buffer.h | 4 +- src/core/hle/service/vi/vi.cpp | 129 ++++++- src/video_core/CMakeLists.txt | 1 + src/video_core/buffer_cache/buffer_cache.h | 97 ++--- .../buffer_cache/buffer_cache_base.h | 4 - src/video_core/buffer_cache/usage_tracker.h | 79 ++++ .../renderer_opengl/gl_buffer_cache.cpp | 7 +- .../renderer_opengl/gl_buffer_cache.h | 17 +- src/video_core/renderer_vulkan/blit_image.cpp | 51 +-- .../renderer_vulkan/renderer_vulkan.cpp | 2 +- .../renderer_vulkan/vk_blit_screen.cpp | 75 ++-- .../renderer_vulkan/vk_buffer_cache.cpp | 126 ++++--- .../renderer_vulkan/vk_buffer_cache.h | 21 +- .../renderer_vulkan/vk_compute_pass.cpp | 49 +-- .../renderer_vulkan/vk_compute_pipeline.cpp | 6 +- src/video_core/renderer_vulkan/vk_fsr.cpp | 3 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 73 ++-- .../renderer_vulkan/vk_master_semaphore.cpp | 29 +- .../renderer_vulkan/vk_master_semaphore.h | 14 +- .../renderer_vulkan/vk_present_manager.cpp | 4 +- .../renderer_vulkan/vk_query_cache.cpp | 74 ++-- .../renderer_vulkan/vk_rasterizer.cpp | 166 +++++---- .../renderer_vulkan/vk_scheduler.cpp | 55 +-- src/video_core/renderer_vulkan/vk_scheduler.h | 9 +- src/video_core/renderer_vulkan/vk_smaa.cpp | 6 +- .../renderer_vulkan/vk_staging_buffer_pool.h | 4 + .../renderer_vulkan/vk_texture_cache.cpp | 14 +- src/video_core/texture_cache/slot_vector.h | 4 + src/video_core/vulkan_common/vulkan_wrapper.h | 4 + src/yuzu/configuration/configure_debug.cpp | 3 + src/yuzu/configuration/configure_debug.ui | 140 +++---- src/yuzu/main.cpp | 63 +++- src/yuzu/main.h | 9 + src/yuzu/main.ui | 30 ++ 52 files changed, 1626 insertions(+), 518 deletions(-) create mode 100755 src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp create mode 100755 src/core/hle/service/nvnflinger/fb_share_buffer_manager.h create mode 100755 src/video_core/buffer_cache/usage_tracker.h diff --git a/README.md b/README.md index 47bdaa197..bc5cb4317 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 3904. +This is the source code for early-access 3905. ## Legal Notice diff --git a/src/common/settings.h b/src/common/settings.h index 8d89d34f7..c5db43b5a 100755 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -353,6 +353,8 @@ struct Values { Category::RendererDebug}; // TODO: remove this once AMDVLK supports VK_EXT_depth_bias_control bool renderer_amdvlk_depth_bias_workaround{}; + Setting disable_buffer_reorder{linkage, false, "disable_buffer_reorder", + Category::RendererDebug}; // System SwitchableSetting language_index{linkage, diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index d91c6d0ec..5c9a97ddc 100755 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -698,6 +698,8 @@ add_library(core STATIC hle/service/nvnflinger/consumer_base.cpp hle/service/nvnflinger/consumer_base.h hle/service/nvnflinger/consumer_listener.h + hle/service/nvnflinger/fb_share_buffer_manager.cpp + hle/service/nvnflinger/fb_share_buffer_manager.h hle/service/nvnflinger/graphic_buffer_producer.cpp hle/service/nvnflinger/graphic_buffer_producer.h hle/service/nvnflinger/hos_binder_driver_server.cpp diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 807ae5c0f..6d26f0a14 100755 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -19,6 +19,7 @@ #include "core/hle/service/am/am.h" #include "core/hle/service/am/applet_ae.h" #include "core/hle/service/am/applet_oe.h" +#include "core/hle/service/am/applets/applet_cabinet.h" #include "core/hle/service/am/applets/applet_mii_edit_types.h" #include "core/hle/service/am/applets/applet_profile_select.h" #include "core/hle/service/am/applets/applet_web_browser.h" @@ -33,11 +34,13 @@ #include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/ipc_helpers.h" #include "core/hle/service/ns/ns.h" +#include "core/hle/service/nvnflinger/fb_share_buffer_manager.h" #include "core/hle/service/nvnflinger/nvnflinger.h" #include "core/hle/service/pm/pm.h" #include "core/hle/service/server_manager.h" #include "core/hle/service/sm/sm.h" #include "core/hle/service/vi/vi.h" +#include "core/hle/service/vi/vi_results.h" #include "core/memory.h" namespace Service::AM { @@ -190,7 +193,7 @@ IDisplayController::IDisplayController(Core::System& system_) {4, nullptr, "UpdateCallerAppletCaptureImage"}, {5, nullptr, "GetLastForegroundCaptureImageEx"}, {6, nullptr, "GetLastApplicationCaptureImageEx"}, - {7, nullptr, "GetCallerAppletCaptureImageEx"}, + {7, &IDisplayController::GetCallerAppletCaptureImageEx, "GetCallerAppletCaptureImageEx"}, {8, &IDisplayController::TakeScreenShotOfOwnLayer, "TakeScreenShotOfOwnLayer"}, {9, nullptr, "CopyBetweenCaptureBuffers"}, {10, nullptr, "AcquireLastApplicationCaptureBuffer"}, @@ -208,8 +211,8 @@ IDisplayController::IDisplayController(Core::System& system_) {23, nullptr, "ReleaseLastApplicationCaptureSharedBuffer"}, {24, nullptr, "AcquireLastForegroundCaptureSharedBuffer"}, {25, nullptr, "ReleaseLastForegroundCaptureSharedBuffer"}, - {26, nullptr, "AcquireCallerAppletCaptureSharedBuffer"}, - {27, nullptr, "ReleaseCallerAppletCaptureSharedBuffer"}, + {26, &IDisplayController::AcquireCallerAppletCaptureSharedBuffer, "AcquireCallerAppletCaptureSharedBuffer"}, + {27, &IDisplayController::ReleaseCallerAppletCaptureSharedBuffer, "ReleaseCallerAppletCaptureSharedBuffer"}, {28, nullptr, "TakeScreenShotOfOwnLayerEx"}, }; // clang-format on @@ -219,6 +222,15 @@ IDisplayController::IDisplayController(Core::System& system_) IDisplayController::~IDisplayController() = default; +void IDisplayController::GetCallerAppletCaptureImageEx(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(ResultSuccess); + rb.Push(1u); + rb.Push(0); +} + void IDisplayController::TakeScreenShotOfOwnLayer(HLERequestContext& ctx) { LOG_WARNING(Service_AM, "(STUBBED) called"); @@ -226,6 +238,22 @@ void IDisplayController::TakeScreenShotOfOwnLayer(HLERequestContext& ctx) { rb.Push(ResultSuccess); } +void IDisplayController::AcquireCallerAppletCaptureSharedBuffer(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(ResultSuccess); + rb.Push(1U); + rb.Push(0); +} + +void IDisplayController::ReleaseCallerAppletCaptureSharedBuffer(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + IDebugFunctions::IDebugFunctions(Core::System& system_) : ServiceFramework{system_, "IDebugFunctions"} { // clang-format off @@ -285,14 +313,14 @@ ISelfController::ISelfController(Core::System& system_, Nvnflinger::Nvnflinger& {20, nullptr, "SetDesirableKeyboardLayout"}, {21, nullptr, "GetScreenShotProgramId"}, {40, &ISelfController::CreateManagedDisplayLayer, "CreateManagedDisplayLayer"}, - {41, nullptr, "IsSystemBufferSharingEnabled"}, - {42, nullptr, "GetSystemSharedLayerHandle"}, - {43, nullptr, "GetSystemSharedBufferHandle"}, + {41, &ISelfController::IsSystemBufferSharingEnabled, "IsSystemBufferSharingEnabled"}, + {42, &ISelfController::GetSystemSharedLayerHandle, "GetSystemSharedLayerHandle"}, + {43, &ISelfController::GetSystemSharedBufferHandle, "GetSystemSharedBufferHandle"}, {44, &ISelfController::CreateManagedDisplaySeparableLayer, "CreateManagedDisplaySeparableLayer"}, {45, nullptr, "SetManagedDisplayLayerSeparationMode"}, {46, nullptr, "SetRecordingLayerCompositionEnabled"}, {50, &ISelfController::SetHandlesRequestToDisplay, "SetHandlesRequestToDisplay"}, - {51, nullptr, "ApproveToDisplay"}, + {51, &ISelfController::ApproveToDisplay, "ApproveToDisplay"}, {60, nullptr, "OverrideAutoSleepTimeAndDimmingTime"}, {61, nullptr, "SetMediaPlaybackState"}, {62, &ISelfController::SetIdleTimeDetectionExtension, "SetIdleTimeDetectionExtension"}, @@ -491,6 +519,44 @@ void ISelfController::CreateManagedDisplayLayer(HLERequestContext& ctx) { rb.Push(*layer_id); } +void ISelfController::IsSystemBufferSharingEnabled(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(this->EnsureBufferSharingEnabled()); +} + +void ISelfController::GetSystemSharedLayerHandle(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 6}; + rb.Push(this->EnsureBufferSharingEnabled()); + rb.Push(system_shared_buffer_id); + rb.Push(system_shared_layer_id); +} + +void ISelfController::GetSystemSharedBufferHandle(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(this->EnsureBufferSharingEnabled()); + rb.Push(system_shared_buffer_id); +} + +Result ISelfController::EnsureBufferSharingEnabled() { + R_SUCCEED_IF(buffer_sharing_enabled); + R_UNLESS(system.GetAppletManager().GetCurrentAppletId() > Applets::AppletId::Application, + VI::ResultOperationFailed); + + ON_RESULT_SUCCESS { + buffer_sharing_enabled = true; + }; + + const auto display_id = nvnflinger.OpenDisplay("Default"); + R_RETURN(nvnflinger.GetSystemBufferManager().Initialize(&system_shared_buffer_id, + &system_shared_layer_id, *display_id)); +} + void ISelfController::CreateManagedDisplaySeparableLayer(HLERequestContext& ctx) { LOG_WARNING(Service_AM, "(STUBBED) called"); @@ -516,6 +582,13 @@ void ISelfController::SetHandlesRequestToDisplay(HLERequestContext& ctx) { rb.Push(ResultSuccess); } +void ISelfController::ApproveToDisplay(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + void ISelfController::SetIdleTimeDetectionExtension(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; idle_time_detection_extension = rp.Pop(); @@ -686,7 +759,8 @@ void AppletMessageQueue::OperationModeChanged() { ICommonStateGetter::ICommonStateGetter(Core::System& system_, std::shared_ptr msg_queue_) - : ServiceFramework{system_, "ICommonStateGetter"}, msg_queue{std::move(msg_queue_)} { + : ServiceFramework{system_, "ICommonStateGetter"}, msg_queue{std::move(msg_queue_)}, + service_context{system_, "ICommonStateGetter"} { // clang-format off static const FunctionInfo functions[] = { {0, &ICommonStateGetter::GetEventHandle, "GetEventHandle"}, @@ -699,10 +773,10 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system_, {7, nullptr, "GetCradleStatus"}, {8, &ICommonStateGetter::GetBootMode, "GetBootMode"}, {9, &ICommonStateGetter::GetCurrentFocusState, "GetCurrentFocusState"}, - {10, nullptr, "RequestToAcquireSleepLock"}, + {10, &ICommonStateGetter::RequestToAcquireSleepLock, "RequestToAcquireSleepLock"}, {11, nullptr, "ReleaseSleepLock"}, {12, nullptr, "ReleaseSleepLockTransiently"}, - {13, nullptr, "GetAcquiredSleepLockEvent"}, + {13, &ICommonStateGetter::GetAcquiredSleepLockEvent, "GetAcquiredSleepLockEvent"}, {14, nullptr, "GetWakeupCount"}, {20, nullptr, "PushToGeneralChannel"}, {30, nullptr, "GetHomeButtonReaderLockAccessor"}, @@ -745,6 +819,8 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system_, RegisterHandlers(functions); + sleep_lock_event = service_context.CreateEvent("ICommonStateGetter::SleepLockEvent"); + // Configure applets to be in foreground state msg_queue->PushMessage(AppletMessageQueue::AppletMessage::FocusStateChanged); msg_queue->PushMessage(AppletMessageQueue::AppletMessage::ChangeIntoForeground); @@ -793,6 +869,24 @@ void ICommonStateGetter::GetCurrentFocusState(HLERequestContext& ctx) { rb.Push(static_cast(FocusState::InFocus)); } +void ICommonStateGetter::RequestToAcquireSleepLock(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + + // Sleep lock is acquired immediately. + sleep_lock_event->Signal(); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + +void ICommonStateGetter::GetAcquiredSleepLockEvent(HLERequestContext& ctx) { + LOG_WARNING(Service_AM, "called"); + + IPC::ResponseBuilder rb{ctx, 2, 1}; + rb.Push(ResultSuccess); + rb.PushCopyObjects(sleep_lock_event->GetReadableEvent()); +} + void ICommonStateGetter::IsVrModeEnabled(HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); @@ -1385,7 +1479,16 @@ ILibraryAppletSelfAccessor::ILibraryAppletSelfAccessor(Core::System& system_) // clang-format on RegisterHandlers(functions); - PushInShowMiiEditData(); + switch (system.GetAppletManager().GetCurrentAppletId()) { + case Applets::AppletId::Cabinet: + PushInShowCabinetData(); + break; + case Applets::AppletId::MiiEdit: + PushInShowMiiEditData(); + break; + default: + break; + } } ILibraryAppletSelfAccessor::~ILibraryAppletSelfAccessor() = default; @@ -1431,7 +1534,7 @@ void ILibraryAppletSelfAccessor::GetLibraryAppletInfo(HLERequestContext& ctx) { LOG_WARNING(Service_AM, "(STUBBED) called"); const LibraryAppletInfo applet_info{ - .applet_id = Applets::AppletId::MiiEdit, + .applet_id = system.GetAppletManager().GetCurrentAppletId(), .library_applet_mode = Applets::LibraryAppletMode::AllForeground, }; @@ -1459,6 +1562,35 @@ void ILibraryAppletSelfAccessor::GetCallerAppletIdentityInfo(HLERequestContext& rb.PushRaw(applet_info); } +void ILibraryAppletSelfAccessor::PushInShowCabinetData() { + constexpr Applets::Applet::CommonArguments arguments{ + .arguments_version = 3, + .size = 0x20, + .library_version = 1, + .theme_color = 3, + .play_startup_sound = true, + .system_tick = 0, + }; + + const Applets::StartParamForAmiiboSettings amiibo_settings{ + .param_1 = 0, + .applet_mode = system.GetAppletManager().GetCabinetMode(), + .flags = 0, + .amiibo_settings_1 = 0, + .device_handle = 0, + .tag_info{}, + .register_info{}, + .amiibo_settings_3{}, + }; + + std::vector argument_data(sizeof(arguments)); + std::vector settings_data(sizeof(amiibo_settings)); + std::memcpy(argument_data.data(), &arguments, sizeof(arguments)); + std::memcpy(settings_data.data(), &amiibo_settings, sizeof(amiibo_settings)); + queue_data.emplace_back(std::move(argument_data)); + queue_data.emplace_back(std::move(settings_data)); +} + void ILibraryAppletSelfAccessor::PushInShowMiiEditData() { struct MiiEditV3 { Applets::MiiEditAppletInputCommon common; @@ -2235,7 +2367,7 @@ void IProcessWindingController::GetLaunchReason(HLERequestContext& ctx) { } void IProcessWindingController::OpenCallingLibraryApplet(HLERequestContext& ctx) { - const auto applet_id = Applets::AppletId::MiiEdit; + const auto applet_id = system.GetAppletManager().GetCurrentAppletId(); const auto applet_mode = Applets::LibraryAppletMode::AllForeground; LOG_WARNING(Service_AM, "(STUBBED) called with applet_id={:08X}, applet_mode={:08X}", applet_id, @@ -2256,4 +2388,5 @@ void IProcessWindingController::OpenCallingLibraryApplet(HLERequestContext& ctx) rb.Push(ResultSuccess); rb.PushIpcInterface(system, applet); } + } // namespace Service::AM diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index b6d2b09bc..f0804dd44 100755 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h @@ -122,7 +122,10 @@ public: ~IDisplayController() override; private: + void GetCallerAppletCaptureImageEx(HLERequestContext& ctx); void TakeScreenShotOfOwnLayer(HLERequestContext& ctx); + void AcquireCallerAppletCaptureSharedBuffer(HLERequestContext& ctx); + void ReleaseCallerAppletCaptureSharedBuffer(HLERequestContext& ctx); }; class IDebugFunctions final : public ServiceFramework { @@ -150,9 +153,13 @@ private: void SetRestartMessageEnabled(HLERequestContext& ctx); void SetOutOfFocusSuspendingEnabled(HLERequestContext& ctx); void SetAlbumImageOrientation(HLERequestContext& ctx); + void IsSystemBufferSharingEnabled(HLERequestContext& ctx); + void GetSystemSharedBufferHandle(HLERequestContext& ctx); + void GetSystemSharedLayerHandle(HLERequestContext& ctx); void CreateManagedDisplayLayer(HLERequestContext& ctx); void CreateManagedDisplaySeparableLayer(HLERequestContext& ctx); void SetHandlesRequestToDisplay(HLERequestContext& ctx); + void ApproveToDisplay(HLERequestContext& ctx); void SetIdleTimeDetectionExtension(HLERequestContext& ctx); void GetIdleTimeDetectionExtension(HLERequestContext& ctx); void ReportUserIsActive(HLERequestContext& ctx); @@ -164,6 +171,8 @@ private: void SaveCurrentScreenshot(HLERequestContext& ctx); void SetRecordVolumeMuted(HLERequestContext& ctx); + Result EnsureBufferSharingEnabled(); + enum class ScreenshotPermission : u32 { Inherit = 0, Enable = 1, @@ -179,7 +188,10 @@ private: u32 idle_time_detection_extension = 0; u64 num_fatal_sections_entered = 0; + u64 system_shared_buffer_id = 0; + u64 system_shared_layer_id = 0; bool is_auto_sleep_disabled = false; + bool buffer_sharing_enabled = false; ScreenshotPermission screenshot_permission = ScreenshotPermission::Inherit; }; @@ -223,6 +235,8 @@ private: void GetEventHandle(HLERequestContext& ctx); void ReceiveMessage(HLERequestContext& ctx); void GetCurrentFocusState(HLERequestContext& ctx); + void RequestToAcquireSleepLock(HLERequestContext& ctx); + void GetAcquiredSleepLockEvent(HLERequestContext& ctx); void GetDefaultDisplayResolutionChangeEvent(HLERequestContext& ctx); void GetOperationMode(HLERequestContext& ctx); void GetPerformanceMode(HLERequestContext& ctx); @@ -240,6 +254,8 @@ private: std::shared_ptr msg_queue; bool vr_mode_state{}; + Kernel::KEvent* sleep_lock_event; + KernelHelpers::ServiceContext service_context; }; class IStorageImpl { @@ -311,6 +327,7 @@ private: void ExitProcessAndReturn(HLERequestContext& ctx); void GetCallerAppletIdentityInfo(HLERequestContext& ctx); + void PushInShowCabinetData(); void PushInShowMiiEditData(); std::deque> queue_data; diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp index fa66c9a60..71b711cd5 100755 --- a/src/core/hle/service/am/applets/applets.cpp +++ b/src/core/hle/service/am/applets/applets.cpp @@ -199,6 +199,14 @@ const AppletFrontendSet& AppletManager::GetAppletFrontendSet() const { return frontend; } +NFP::CabinetMode AppletManager::GetCabinetMode() const { + return cabinet_mode; +} + +AppletId AppletManager::GetCurrentAppletId() const { + return current_applet_id; +} + void AppletManager::SetAppletFrontendSet(AppletFrontendSet set) { if (set.cabinet != nullptr) { frontend.cabinet = std::move(set.cabinet); @@ -237,6 +245,14 @@ void AppletManager::SetAppletFrontendSet(AppletFrontendSet set) { } } +void AppletManager::SetCabinetMode(NFP::CabinetMode mode) { + cabinet_mode = mode; +} + +void AppletManager::SetCurrentAppletId(AppletId applet_id) { + current_applet_id = applet_id; +} + void AppletManager::SetDefaultAppletFrontendSet() { ClearAll(); SetDefaultAppletsIfMissing(); diff --git a/src/core/hle/service/am/applets/applets.h b/src/core/hle/service/am/applets/applets.h index 42d0d8cf1..0b36995b3 100755 --- a/src/core/hle/service/am/applets/applets.h +++ b/src/core/hle/service/am/applets/applets.h @@ -34,6 +34,10 @@ class KEvent; class KReadableEvent; } // namespace Kernel +namespace Service::NFP { +enum class CabinetMode : u8; +} // namespace Service::NFP + namespace Service::AM { class IStorage; @@ -41,6 +45,8 @@ class IStorage; namespace Applets { enum class AppletId : u32 { + None = 0x00, + Application = 0x01, OverlayDisplay = 0x02, QLaunch = 0x03, Starter = 0x04, @@ -160,7 +166,6 @@ public: return initialized; } -protected: struct CommonArguments { u32_le arguments_version; u32_le size; @@ -171,6 +176,7 @@ protected: }; static_assert(sizeof(CommonArguments) == 0x20, "CommonArguments has incorrect size."); +protected: CommonArguments common_args{}; AppletDataBroker broker; LibraryAppletMode applet_mode; @@ -219,8 +225,12 @@ public: ~AppletManager(); const AppletFrontendSet& GetAppletFrontendSet() const; + NFP::CabinetMode GetCabinetMode() const; + AppletId GetCurrentAppletId() const; void SetAppletFrontendSet(AppletFrontendSet set); + void SetCabinetMode(NFP::CabinetMode mode); + void SetCurrentAppletId(AppletId applet_id); void SetDefaultAppletFrontendSet(); void SetDefaultAppletsIfMissing(); void ClearAll(); @@ -228,6 +238,9 @@ public: std::shared_ptr GetApplet(AppletId id, LibraryAppletMode mode) const; private: + AppletId current_applet_id{}; + NFP::CabinetMode cabinet_mode{}; + AppletFrontendSet frontend; Core::System& system; }; diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index 702a1dd29..c12850c60 100755 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp @@ -23,19 +23,39 @@ public: explicit IMonitorService(Core::System& system_) : ServiceFramework{system_, "IMonitorService"} { // clang-format off static const FunctionInfo functions[] = { - {0, nullptr, "GetStateForMonitor"}, + {0, &IMonitorService::GetStateForMonitor, "GetStateForMonitor"}, {1, nullptr, "GetNetworkInfoForMonitor"}, {2, nullptr, "GetIpv4AddressForMonitor"}, {3, nullptr, "GetDisconnectReasonForMonitor"}, {4, nullptr, "GetSecurityParameterForMonitor"}, {5, nullptr, "GetNetworkConfigForMonitor"}, - {100, nullptr, "InitializeMonitor"}, + {100, &IMonitorService::InitializeMonitor, "InitializeMonitor"}, {101, nullptr, "FinalizeMonitor"}, }; // clang-format on RegisterHandlers(functions); } + +private: + void GetStateForMonitor(HLERequestContext& ctx) { + LOG_INFO(Service_LDN, "called"); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(ResultSuccess); + rb.PushEnum(state); + } + + void InitializeMonitor(HLERequestContext& ctx) { + LOG_INFO(Service_LDN, "called"); + + state = State::Initialized; + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); + } + + State state{State::None}; }; class LDNM final : public ServiceFramework { @@ -731,14 +751,81 @@ public: } }; +class ISfMonitorService final : public ServiceFramework { +public: + explicit ISfMonitorService(Core::System& system_) + : ServiceFramework{system_, "ISfMonitorService"} { + // clang-format off + static const FunctionInfo functions[] = { + {0, &ISfMonitorService::Initialize, "Initialize"}, + {288, &ISfMonitorService::GetGroupInfo, "GetGroupInfo"}, + {320, nullptr, "GetLinkLevel"}, + }; + // clang-format on + + RegisterHandlers(functions); + } + +private: + void Initialize(HLERequestContext& ctx) { + LOG_WARNING(Service_LDN, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(ResultSuccess); + rb.Push(0); + } + + void GetGroupInfo(HLERequestContext& ctx) { + LOG_WARNING(Service_LDN, "(STUBBED) called"); + + struct GroupInfo { + std::array info; + }; + + GroupInfo group_info{}; + + ctx.WriteBuffer(group_info); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); + } +}; + +class LP2PM final : public ServiceFramework { +public: + explicit LP2PM(Core::System& system_) : ServiceFramework{system_, "lp2p:m"} { + // clang-format off + static const FunctionInfo functions[] = { + {0, &LP2PM::CreateMonitorService, "CreateMonitorService"}, + }; + // clang-format on + + RegisterHandlers(functions); + } + +private: + void CreateMonitorService(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const u64 reserved_input = rp.Pop(); + + LOG_INFO(Service_LDN, "called, reserved_input={}", reserved_input); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(ResultSuccess); + rb.PushIpcInterface(system); + } +}; + void LoopProcess(Core::System& system) { auto server_manager = std::make_unique(system); server_manager->RegisterNamedService("ldn:m", std::make_shared(system)); server_manager->RegisterNamedService("ldn:s", std::make_shared(system)); server_manager->RegisterNamedService("ldn:u", std::make_shared(system)); + server_manager->RegisterNamedService("lp2p:app", std::make_shared(system)); server_manager->RegisterNamedService("lp2p:sys", std::make_shared(system)); + server_manager->RegisterNamedService("lp2p:m", std::make_shared(system)); + ServerManager::RunServer(std::move(server_manager)); } diff --git a/src/core/hle/service/nfc/common/device.cpp b/src/core/hle/service/nfc/common/device.cpp index 68c407f81..e7a00deb3 100755 --- a/src/core/hle/service/nfc/common/device.cpp +++ b/src/core/hle/service/nfc/common/device.cpp @@ -830,11 +830,6 @@ Result NfcDevice::SetRegisterInfoPrivate(const NFP::RegisterInfoPrivate& registe return ResultWrongDeviceState; } - Service::Mii::StoreData store_data{}; - Service::Mii::NfpStoreDataExtension extension{}; - store_data.BuildBase(Mii::Gender::Male); - extension.SetFromStoreData(store_data); - auto& settings = tag_data.settings; if (tag_data.settings.settings.amiibo_initialized == 0) { @@ -843,8 +838,8 @@ Result NfcDevice::SetRegisterInfoPrivate(const NFP::RegisterInfoPrivate& registe } SetAmiiboName(settings, register_info.amiibo_name); - tag_data.owner_mii.BuildFromStoreData(store_data); - tag_data.mii_extension = extension; + tag_data.owner_mii.BuildFromStoreData(register_info.mii_store_data); + tag_data.mii_extension.SetFromStoreData(register_info.mii_store_data); tag_data.unknown = 0; tag_data.unknown2 = {}; settings.country_code_id = 0; diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index e1f5c947d..e42122a11 100755 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h @@ -45,13 +45,6 @@ public: IsSharedMemMapped = 6 }; -private: - /// Id to use for the next handle that is created. - u32 next_handle = 0; - - /// Id to use for the next object that is created. - u32 next_id = 0; - struct IocCreateParams { // Input u32_le size{}; @@ -113,6 +106,13 @@ private: NvResult IocParam(std::span input, std::span output); NvResult IocFree(std::span input, std::span output); +private: + /// Id to use for the next handle that is created. + u32 next_handle = 0; + + /// Id to use for the next object that is created. + u32 next_id = 0; + NvCore::Container& container; NvCore::NvMap& file; }; diff --git a/src/core/hle/service/nvnflinger/buffer_item.h b/src/core/hle/service/nvnflinger/buffer_item.h index 7fd808f54..3da8cc3aa 100755 --- a/src/core/hle/service/nvnflinger/buffer_item.h +++ b/src/core/hle/service/nvnflinger/buffer_item.h @@ -15,7 +15,7 @@ namespace Service::android { -class GraphicBuffer; +struct GraphicBuffer; class BufferItem final { public: diff --git a/src/core/hle/service/nvnflinger/buffer_slot.h b/src/core/hle/service/nvnflinger/buffer_slot.h index d25bca049..d8c9dec3b 100755 --- a/src/core/hle/service/nvnflinger/buffer_slot.h +++ b/src/core/hle/service/nvnflinger/buffer_slot.h @@ -13,7 +13,7 @@ namespace Service::android { -class GraphicBuffer; +struct GraphicBuffer; enum class BufferState : u32 { Free = 0, diff --git a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp new file mode 100755 index 000000000..469a53244 --- /dev/null +++ b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp @@ -0,0 +1,351 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "core/core.h" +#include "core/hle/kernel/k_process.h" +#include "core/hle/kernel/k_system_resource.h" +#include "core/hle/service/nvdrv/devices/nvmap.h" +#include "core/hle/service/nvdrv/nvdrv.h" +#include "core/hle/service/nvnflinger/buffer_queue_producer.h" +#include "core/hle/service/nvnflinger/fb_share_buffer_manager.h" +#include "core/hle/service/nvnflinger/pixel_format.h" +#include "core/hle/service/nvnflinger/ui/graphic_buffer.h" +#include "core/hle/service/vi/layer/vi_layer.h" +#include "core/hle/service/vi/vi_results.h" + +namespace Service::Nvnflinger { + +namespace { + +Result AllocateIoForProcessAddressSpace(Common::ProcessAddress* out_map_address, + std::unique_ptr* out_page_group, + Core::System& system, u32 size) { + using Core::Memory::YUZU_PAGESIZE; + + // Allocate memory for the system shared buffer. + // FIXME: Because the gmmu can only point to cpu addresses, we need + // to map this in the application space to allow it to be used. + // FIXME: Add proper smmu emulation. + // FIXME: This memory belongs to vi's .data section. + auto& kernel = system.Kernel(); + auto* process = system.ApplicationProcess(); + auto& page_table = process->GetPageTable(); + + // Hold a temporary page group reference while we try to map it. + auto pg = std::make_unique( + kernel, std::addressof(kernel.GetSystemSystemResource().GetBlockInfoManager())); + + // Allocate memory from secure pool. + R_TRY(kernel.MemoryManager().AllocateAndOpen( + pg.get(), size / YUZU_PAGESIZE, + Kernel::KMemoryManager::EncodeOption(Kernel::KMemoryManager::Pool::Secure, + Kernel::KMemoryManager::Direction::FromBack))); + + // Get bounds of where mapping is possible. + const VAddr alias_code_begin = GetInteger(page_table.GetAliasCodeRegionStart()); + const VAddr alias_code_size = page_table.GetAliasCodeRegionSize() / YUZU_PAGESIZE; + const auto state = Kernel::KMemoryState::Io; + const auto perm = Kernel::KMemoryPermission::UserReadWrite; + std::mt19937_64 rng{process->GetRandomEntropy(0)}; + + // Retry up to 64 times to map into alias code range. + Result res = ResultSuccess; + int i; + for (i = 0; i < 64; i++) { + *out_map_address = alias_code_begin + ((rng() % alias_code_size) * YUZU_PAGESIZE); + res = page_table.MapPageGroup(*out_map_address, *pg, state, perm); + if (R_SUCCEEDED(res)) { + break; + } + } + + // Return failure, if necessary + R_UNLESS(i < 64, res); + + // Return the mapped page group. + *out_page_group = std::move(pg); + + // We succeeded. + R_SUCCEED(); +} + +template +std::span SerializeIoc(T& params) { + return std::span(reinterpret_cast(std::addressof(params)), sizeof(T)); +} + +Result CreateNvMapHandle(u32* out_nv_map_handle, Nvidia::Devices::nvmap& nvmap, u32 size) { + // Create a handle. + Nvidia::Devices::nvmap::IocCreateParams create_in_params{ + .size = size, + .handle = 0, + }; + Nvidia::Devices::nvmap::IocCreateParams create_out_params{}; + R_UNLESS(nvmap.IocCreate(SerializeIoc(create_in_params), SerializeIoc(create_out_params)) == + Nvidia::NvResult::Success, + VI::ResultOperationFailed); + + // Assign the output handle. + *out_nv_map_handle = create_out_params.handle; + + // We succeeded. + R_SUCCEED(); +} + +Result FreeNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle) { + // Free the handle. + Nvidia::Devices::nvmap::IocFreeParams free_in_params{ + .handle = handle, + }; + Nvidia::Devices::nvmap::IocFreeParams free_out_params{}; + R_UNLESS(nvmap.IocFree(SerializeIoc(free_in_params), SerializeIoc(free_out_params)) == + Nvidia::NvResult::Success, + VI::ResultOperationFailed); + + // We succeeded. + R_SUCCEED(); +} + +Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::ProcessAddress buffer, + u32 size) { + // Assign the allocated memory to the handle. + Nvidia::Devices::nvmap::IocAllocParams alloc_in_params{ + .handle = handle, + .heap_mask = 0, + .flags = {}, + .align = 0, + .kind = 0, + .address = GetInteger(buffer), + }; + Nvidia::Devices::nvmap::IocAllocParams alloc_out_params{}; + R_UNLESS(nvmap.IocAlloc(SerializeIoc(alloc_in_params), SerializeIoc(alloc_out_params)) == + Nvidia::NvResult::Success, + VI::ResultOperationFailed); + + // We succeeded. + R_SUCCEED(); +} + +Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, + Common::ProcessAddress buffer, u32 size) { + // Get the nvmap device. + auto nvmap_fd = nvdrv.Open("/dev/nvmap"); + auto nvmap = nvdrv.GetDevice(nvmap_fd); + ASSERT(nvmap != nullptr); + + // Create a handle. + R_TRY(CreateNvMapHandle(out_handle, *nvmap, size)); + + // Ensure we maintain a clean state on failure. + ON_RESULT_FAILURE { + ASSERT(R_SUCCEEDED(FreeNvMapHandle(*nvmap, *out_handle))); + }; + + // Assign the allocated memory to the handle. + R_RETURN(AllocNvMapHandle(*nvmap, *out_handle, buffer, size)); +} + +constexpr auto SharedBufferBlockLinearFormat = android::PixelFormat::Rgba8888; +constexpr u32 SharedBufferBlockLinearBpp = 4; + +constexpr u32 SharedBufferBlockLinearWidth = 1280; +constexpr u32 SharedBufferBlockLinearHeight = 768; +constexpr u32 SharedBufferBlockLinearStride = + SharedBufferBlockLinearWidth * SharedBufferBlockLinearBpp; +constexpr u32 SharedBufferNumSlots = 7; + +constexpr u32 SharedBufferWidth = 1280; +constexpr u32 SharedBufferHeight = 720; +constexpr u32 SharedBufferAsync = false; + +constexpr u32 SharedBufferSlotSize = + SharedBufferBlockLinearWidth * SharedBufferBlockLinearHeight * SharedBufferBlockLinearBpp; +constexpr u32 SharedBufferSize = SharedBufferSlotSize * SharedBufferNumSlots; + +constexpr SharedMemoryPoolLayout SharedBufferPoolLayout = [] { + SharedMemoryPoolLayout layout{}; + layout.num_slots = SharedBufferNumSlots; + + for (u32 i = 0; i < SharedBufferNumSlots; i++) { + layout.slots[i].buffer_offset = i * SharedBufferSlotSize; + layout.slots[i].size = SharedBufferSlotSize; + layout.slots[i].width = SharedBufferWidth; + layout.slots[i].height = SharedBufferHeight; + } + + return layout; +}(); + +void MakeGraphicBuffer(android::BufferQueueProducer& producer, u32 slot, u32 handle) { + auto buffer = std::make_shared(); + buffer->width = SharedBufferWidth; + buffer->height = SharedBufferHeight; + buffer->stride = SharedBufferBlockLinearStride; + buffer->format = SharedBufferBlockLinearFormat; + buffer->buffer_id = handle; + buffer->offset = slot * SharedBufferSlotSize; + ASSERT(producer.SetPreallocatedBuffer(slot, buffer) == android::Status::NoError); +} + +} // namespace + +FbShareBufferManager::FbShareBufferManager(Core::System& system, Nvnflinger& flinger, + std::shared_ptr nvdrv) + : m_system(system), m_flinger(flinger), m_nvdrv(std::move(nvdrv)) {} + +FbShareBufferManager::~FbShareBufferManager() = default; + +Result FbShareBufferManager::Initialize(u64* out_buffer_id, u64* out_layer_id, u64 display_id) { + std::scoped_lock lk{m_guard}; + + // Ensure we have not already created a buffer. + R_UNLESS(m_buffer_id == 0, VI::ResultOperationFailed); + + // Allocate memory and space for the shared buffer. + Common::ProcessAddress map_address; + R_TRY(AllocateIoForProcessAddressSpace(std::addressof(map_address), + std::addressof(m_buffer_page_group), m_system, + SharedBufferSize)); + + // Create an nvmap handle for the buffer and assign the memory to it. + R_TRY(AllocateHandleForBuffer(std::addressof(m_buffer_nvmap_handle), *m_nvdrv, map_address, + SharedBufferSize)); + + // Record the display id. + m_display_id = display_id; + + // Create a layer for the display. + m_layer_id = m_flinger.CreateLayer(m_display_id).value(); + + // Set up the buffer. + m_buffer_id = m_next_buffer_id++; + + // Get the layer. + VI::Layer* layer = m_flinger.FindLayer(m_display_id, m_layer_id); + ASSERT(layer != nullptr); + + // Get the producer and set preallocated buffers. + auto& producer = layer->GetBufferQueue(); + MakeGraphicBuffer(producer, 0, m_buffer_nvmap_handle); + MakeGraphicBuffer(producer, 1, m_buffer_nvmap_handle); + + // Assign outputs. + *out_buffer_id = m_buffer_id; + *out_layer_id = m_layer_id; + + // We succeeded. + R_SUCCEED(); +} + +Result FbShareBufferManager::GetSharedBufferMemoryHandleId(u64* out_buffer_size, + s32* out_nvmap_handle, + SharedMemoryPoolLayout* out_pool_layout, + u64 buffer_id, + u64 applet_resource_user_id) { + std::scoped_lock lk{m_guard}; + + R_UNLESS(m_buffer_id > 0, VI::ResultNotFound); + R_UNLESS(buffer_id == m_buffer_id, VI::ResultNotFound); + + *out_pool_layout = SharedBufferPoolLayout; + *out_buffer_size = SharedBufferSize; + *out_nvmap_handle = m_buffer_nvmap_handle; + + R_SUCCEED(); +} + +Result FbShareBufferManager::GetLayerFromId(VI::Layer** out_layer, u64 layer_id) { + // Ensure the layer id is valid. + R_UNLESS(m_layer_id > 0 && layer_id == m_layer_id, VI::ResultNotFound); + + // Get the layer. + VI::Layer* layer = m_flinger.FindLayer(m_display_id, layer_id); + R_UNLESS(layer != nullptr, VI::ResultNotFound); + + // We succeeded. + *out_layer = layer; + R_SUCCEED(); +} + +Result FbShareBufferManager::AcquireSharedFrameBuffer(android::Fence* out_fence, + std::array& out_slot_indexes, + s64* out_target_slot, u64 layer_id) { + std::scoped_lock lk{m_guard}; + + // Get the layer. + VI::Layer* layer; + R_TRY(this->GetLayerFromId(std::addressof(layer), layer_id)); + + // Get the producer. + auto& producer = layer->GetBufferQueue(); + + // Get the next buffer from the producer. + s32 slot; + R_UNLESS(producer.DequeueBuffer(std::addressof(slot), out_fence, SharedBufferAsync != 0, + SharedBufferWidth, SharedBufferHeight, + SharedBufferBlockLinearFormat, 0) == android::Status::NoError, + VI::ResultOperationFailed); + + // Assign remaining outputs. + *out_target_slot = slot; + out_slot_indexes = {0, 1, -1, -1}; + + // We succeeded. + R_SUCCEED(); +} + +Result FbShareBufferManager::PresentSharedFrameBuffer(android::Fence fence, + Common::Rectangle crop_region, + u32 transform, s32 swap_interval, + u64 layer_id, s64 slot) { + std::scoped_lock lk{m_guard}; + + // Get the layer. + VI::Layer* layer; + R_TRY(this->GetLayerFromId(std::addressof(layer), layer_id)); + + // Get the producer. + auto& producer = layer->GetBufferQueue(); + + // Request to queue the buffer. + std::shared_ptr buffer; + R_UNLESS(producer.RequestBuffer(static_cast(slot), std::addressof(buffer)) == + android::Status::NoError, + VI::ResultOperationFailed); + + // Queue the buffer to the producer. + android::QueueBufferInput input{}; + android::QueueBufferOutput output{}; + input.crop = crop_region; + input.fence = fence; + input.transform = static_cast(transform); + input.swap_interval = swap_interval; + R_UNLESS(producer.QueueBuffer(static_cast(slot), input, std::addressof(output)) == + android::Status::NoError, + VI::ResultOperationFailed); + + // We succeeded. + R_SUCCEED(); +} + +Result FbShareBufferManager::GetSharedFrameBufferAcquirableEvent(Kernel::KReadableEvent** out_event, + u64 layer_id) { + std::scoped_lock lk{m_guard}; + + // Get the layer. + VI::Layer* layer; + R_TRY(this->GetLayerFromId(std::addressof(layer), layer_id)); + + // Get the producer. + auto& producer = layer->GetBufferQueue(); + + // Set the event. + *out_event = std::addressof(producer.GetNativeHandle()); + + // We succeeded. + R_SUCCEED(); +} + +} // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h new file mode 100755 index 000000000..618f4bd7f --- /dev/null +++ b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/math_util.h" +#include "core/hle/service/nvnflinger/nvnflinger.h" +#include "core/hle/service/nvnflinger/ui/fence.h" + +namespace Kernel { +class KPageGroup; +} + +namespace Service::Nvnflinger { + +struct SharedMemorySlot { + u64 buffer_offset; + u64 size; + s32 width; + s32 height; +}; +static_assert(sizeof(SharedMemorySlot) == 0x18, "SharedMemorySlot has wrong size"); + +struct SharedMemoryPoolLayout { + s32 num_slots; + SharedMemorySlot slots[0x10]; +}; +static_assert(sizeof(SharedMemoryPoolLayout) == 0x188, "SharedMemoryPoolLayout has wrong size"); + +class FbShareBufferManager final { +public: + explicit FbShareBufferManager(Core::System& system, Nvnflinger& flinger, + std::shared_ptr nvdrv); + ~FbShareBufferManager(); + + Result Initialize(u64* out_buffer_id, u64* out_layer_handle, u64 display_id); + Result GetSharedBufferMemoryHandleId(u64* out_buffer_size, s32* out_nvmap_handle, + SharedMemoryPoolLayout* out_pool_layout, u64 buffer_id, + u64 applet_resource_user_id); + Result AcquireSharedFrameBuffer(android::Fence* out_fence, std::array& out_slots, + s64* out_target_slot, u64 layer_id); + Result PresentSharedFrameBuffer(android::Fence fence, Common::Rectangle crop_region, + u32 transform, s32 swap_interval, u64 layer_id, s64 slot); + Result GetSharedFrameBufferAcquirableEvent(Kernel::KReadableEvent** out_event, u64 layer_id); + +private: + Result GetLayerFromId(VI::Layer** out_layer, u64 layer_id); + +private: + u64 m_next_buffer_id = 1; + u64 m_display_id = 0; + u64 m_buffer_id = 0; + u64 m_layer_id = 0; + u32 m_buffer_nvmap_handle = 0; + SharedMemoryPoolLayout m_pool_layout = {}; + + std::unique_ptr m_buffer_page_group; + + std::mutex m_guard; + Core::System& m_system; + Nvnflinger& m_flinger; + std::shared_ptr m_nvdrv; +}; + +} // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/graphic_buffer_producer.h b/src/core/hle/service/nvnflinger/graphic_buffer_producer.h index 21d7b31f3..5d7cff7d3 100755 --- a/src/core/hle/service/nvnflinger/graphic_buffer_producer.h +++ b/src/core/hle/service/nvnflinger/graphic_buffer_producer.h @@ -19,6 +19,7 @@ class InputParcel; #pragma pack(push, 1) struct QueueBufferInput final { explicit QueueBufferInput(InputParcel& parcel); + explicit QueueBufferInput() = default; void Deflate(s64* timestamp_, bool* is_auto_timestamp_, Common::Rectangle* crop_, NativeWindowScalingMode* scaling_mode_, NativeWindowTransform* transform_, @@ -34,7 +35,6 @@ struct QueueBufferInput final { *fence_ = fence; } -private: s64 timestamp{}; s32 is_auto_timestamp{}; Common::Rectangle crop{}; diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index 21f31f7a0..a07c621d9 100755 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp @@ -17,6 +17,7 @@ #include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/nvnflinger/buffer_item_consumer.h" #include "core/hle/service/nvnflinger/buffer_queue_core.h" +#include "core/hle/service/nvnflinger/fb_share_buffer_manager.h" #include "core/hle/service/nvnflinger/hos_binder_driver_server.h" #include "core/hle/service/nvnflinger/nvnflinger.h" #include "core/hle/service/nvnflinger/ui/graphic_buffer.h" @@ -331,4 +332,14 @@ s64 Nvnflinger::GetNextTicks() const { return static_cast(speed_scale * (1000000000.f / effective_fps)); } +FbShareBufferManager& Nvnflinger::GetSystemBufferManager() { + const auto lock_guard = Lock(); + + if (!system_buffer_manager) { + system_buffer_manager = std::make_unique(system, *this, nvdrv); + } + + return *system_buffer_manager; +} + } // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/nvnflinger.h b/src/core/hle/service/nvnflinger/nvnflinger.h index f478c2bc6..14c783582 100755 --- a/src/core/hle/service/nvnflinger/nvnflinger.h +++ b/src/core/hle/service/nvnflinger/nvnflinger.h @@ -45,6 +45,9 @@ class BufferQueueProducer; namespace Service::Nvnflinger { +class FbShareBufferManager; +class HosBinderDriverServer; + class Nvnflinger final { public: explicit Nvnflinger(Core::System& system_, HosBinderDriverServer& hos_binder_driver_server_); @@ -90,12 +93,16 @@ public: [[nodiscard]] s64 GetNextTicks() const; + FbShareBufferManager& GetSystemBufferManager(); + private: struct Layer { std::unique_ptr core; std::unique_ptr producer; }; + friend class FbShareBufferManager; + private: [[nodiscard]] std::unique_lock Lock() const { return std::unique_lock{*guard}; @@ -140,6 +147,8 @@ private: std::shared_ptr multi_composition_event; std::shared_ptr single_composition_event; + std::unique_ptr system_buffer_manager; + std::shared_ptr guard; Core::System& system; diff --git a/src/core/hle/service/nvnflinger/ui/fence.h b/src/core/hle/service/nvnflinger/ui/fence.h index 536e8156d..177aed758 100755 --- a/src/core/hle/service/nvnflinger/ui/fence.h +++ b/src/core/hle/service/nvnflinger/ui/fence.h @@ -20,6 +20,9 @@ public: static constexpr Fence NoFence() { Fence fence; fence.fences[0].id = -1; + fence.fences[1].id = -1; + fence.fences[2].id = -1; + fence.fences[3].id = -1; return fence; } diff --git a/src/core/hle/service/nvnflinger/ui/graphic_buffer.h b/src/core/hle/service/nvnflinger/ui/graphic_buffer.h index 75d1705a8..3eac5cedd 100755 --- a/src/core/hle/service/nvnflinger/ui/graphic_buffer.h +++ b/src/core/hle/service/nvnflinger/ui/graphic_buffer.h @@ -12,8 +12,7 @@ namespace Service::android { -class GraphicBuffer final { -public: +struct GraphicBuffer final { constexpr GraphicBuffer() = default; constexpr GraphicBuffer(u32 width_, u32 height_, PixelFormat format_, u32 usage_) @@ -77,7 +76,6 @@ public: return false; } -private: u32 magic{}; s32 width{}; s32 height{}; diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index aba6259b2..231fd4e0d 100755 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -20,9 +20,12 @@ #include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/service/ipc_helpers.h" +#include "core/hle/service/nvdrv/devices/nvmap.h" #include "core/hle/service/nvdrv/nvdata.h" +#include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/nvnflinger/binder.h" #include "core/hle/service/nvnflinger/buffer_queue_producer.h" +#include "core/hle/service/nvnflinger/fb_share_buffer_manager.h" #include "core/hle/service/nvnflinger/hos_binder_driver_server.h" #include "core/hle/service/nvnflinger/nvnflinger.h" #include "core/hle/service/nvnflinger/parcel.h" @@ -131,8 +134,9 @@ private: class ISystemDisplayService final : public ServiceFramework { public: - explicit ISystemDisplayService(Core::System& system_) - : ServiceFramework{system_, "ISystemDisplayService"} { + explicit ISystemDisplayService(Core::System& system_, Nvnflinger::Nvnflinger& nvnflinger_) + : ServiceFramework{system_, "ISystemDisplayService"}, nvnflinger{nvnflinger_} { + // clang-format off static const FunctionInfo functions[] = { {1200, nullptr, "GetZOrderCountMin"}, {1202, nullptr, "GetZOrderCountMax"}, @@ -170,22 +174,126 @@ public: {3217, nullptr, "SetDisplayCmuLuma"}, {3218, nullptr, "SetDisplayCrcMode"}, {6013, nullptr, "GetLayerPresentationSubmissionTimestamps"}, - {8225, nullptr, "GetSharedBufferMemoryHandleId"}, - {8250, nullptr, "OpenSharedLayer"}, + {8225, &ISystemDisplayService::GetSharedBufferMemoryHandleId, "GetSharedBufferMemoryHandleId"}, + {8250, &ISystemDisplayService::OpenSharedLayer, "OpenSharedLayer"}, {8251, nullptr, "CloseSharedLayer"}, - {8252, nullptr, "ConnectSharedLayer"}, + {8252, &ISystemDisplayService::ConnectSharedLayer, "ConnectSharedLayer"}, {8253, nullptr, "DisconnectSharedLayer"}, - {8254, nullptr, "AcquireSharedFrameBuffer"}, - {8255, nullptr, "PresentSharedFrameBuffer"}, - {8256, nullptr, "GetSharedFrameBufferAcquirableEvent"}, + {8254, &ISystemDisplayService::AcquireSharedFrameBuffer, "AcquireSharedFrameBuffer"}, + {8255, &ISystemDisplayService::PresentSharedFrameBuffer, "PresentSharedFrameBuffer"}, + {8256, &ISystemDisplayService::GetSharedFrameBufferAcquirableEvent, "GetSharedFrameBufferAcquirableEvent"}, {8257, nullptr, "FillSharedFrameBufferColor"}, {8258, nullptr, "CancelSharedFrameBuffer"}, {9000, nullptr, "GetDp2hdmiController"}, }; + // clang-format on RegisterHandlers(functions); } private: + void GetSharedBufferMemoryHandleId(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const u64 buffer_id = rp.PopRaw(); + + LOG_INFO(Service_VI, "called. buffer_id={:#x}", buffer_id); + + struct OutputParameters { + s32 nvmap_handle; + u64 size; + }; + + OutputParameters out{}; + Nvnflinger::SharedMemoryPoolLayout layout{}; + const auto result = nvnflinger.GetSystemBufferManager().GetSharedBufferMemoryHandleId( + &out.size, &out.nvmap_handle, &layout, buffer_id, 0); + + ctx.WriteBuffer(&layout, sizeof(layout)); + + IPC::ResponseBuilder rb{ctx, 6}; + rb.Push(result); + rb.PushRaw(out); + } + + void OpenSharedLayer(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const u64 layer_id = rp.PopRaw(); + + LOG_INFO(Service_VI, "(STUBBED) called. layer_id={:#x}", layer_id); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); + } + + void ConnectSharedLayer(HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const u64 layer_id = rp.PopRaw(); + + LOG_INFO(Service_VI, "(STUBBED) called. layer_id={:#x}", layer_id); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); + } + + void GetSharedFrameBufferAcquirableEvent(HLERequestContext& ctx) { + LOG_DEBUG(Service_VI, "called"); + + IPC::RequestParser rp{ctx}; + const u64 layer_id = rp.PopRaw(); + + Kernel::KReadableEvent* event{}; + const auto result = nvnflinger.GetSystemBufferManager().GetSharedFrameBufferAcquirableEvent( + &event, layer_id); + + IPC::ResponseBuilder rb{ctx, 2, 1}; + rb.Push(result); + rb.PushCopyObjects(event); + } + + void AcquireSharedFrameBuffer(HLERequestContext& ctx) { + LOG_DEBUG(Service_VI, "called"); + + IPC::RequestParser rp{ctx}; + const u64 layer_id = rp.PopRaw(); + + struct OutputParameters { + android::Fence fence; + std::array slots; + s64 target_slot; + }; + static_assert(sizeof(OutputParameters) == 0x40, "OutputParameters has wrong size"); + + OutputParameters out{}; + const auto result = nvnflinger.GetSystemBufferManager().AcquireSharedFrameBuffer( + &out.fence, out.slots, &out.target_slot, layer_id); + + IPC::ResponseBuilder rb{ctx, 18}; + rb.Push(result); + rb.PushRaw(out); + } + + void PresentSharedFrameBuffer(HLERequestContext& ctx) { + LOG_DEBUG(Service_VI, "called"); + + struct InputParameters { + android::Fence fence; + Common::Rectangle crop_region; + u32 window_transform; + s32 swap_interval; + u64 layer_id; + s64 surface_id; + }; + static_assert(sizeof(InputParameters) == 0x50, "InputParameters has wrong size"); + + IPC::RequestParser rp{ctx}; + auto input = rp.PopRaw(); + + const auto result = nvnflinger.GetSystemBufferManager().PresentSharedFrameBuffer( + input.fence, input.crop_region, input.window_transform, input.swap_interval, + input.layer_id, input.surface_id); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(result); + } + void SetLayerZ(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const u64 layer_id = rp.Pop(); @@ -228,6 +336,9 @@ private: rb.PushRaw(60.0f); // This wouldn't seem to be correct for 30 fps games. rb.Push(0); } + +private: + Nvnflinger::Nvnflinger& nvnflinger; }; class IManagerDisplayService final : public ServiceFramework { @@ -453,7 +564,7 @@ private: IPC::ResponseBuilder rb{ctx, 2, 0, 1}; rb.Push(ResultSuccess); - rb.PushIpcInterface(system); + rb.PushIpcInterface(system, nv_flinger); } void GetManagerDisplayService(HLERequestContext& ctx) { diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 28222509f..78742ad5d 100755 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -15,6 +15,7 @@ add_library(video_core STATIC buffer_cache/buffer_cache.cpp buffer_cache/buffer_cache.h buffer_cache/memory_tracker_base.h + buffer_cache/usage_tracker.h buffer_cache/word_manager.h cache_types.h cdma_pusher.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9f7d72762..08447a4ec 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -67,6 +67,7 @@ void BufferCache

::TickFrame() { if (!channel_state) { return; } + runtime.TickFrame(slot_buffers); // Calculate hits and shots and move hit bits to the right const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), @@ -230,7 +231,10 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am for (const IntervalType& add_interval : tmp_intervals) { common_ranges.add(add_interval); } - runtime.CopyBuffer(dest_buffer, src_buffer, copies); + const auto& copy = copies[0]; + src_buffer.MarkUsage(copy.src_offset, copy.size); + dest_buffer.MarkUsage(copy.dst_offset, copy.size); + runtime.CopyBuffer(dest_buffer, src_buffer, copies, true); if (has_new_downloads) { memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); } @@ -258,9 +262,10 @@ bool BufferCache

::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { common_ranges.subtract(subtract_interval); const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast(size)); - auto& dest_buffer = slot_buffers[buffer]; + Buffer& dest_buffer = slot_buffers[buffer]; const u32 offset = dest_buffer.Offset(*cpu_dst_address); runtime.ClearBuffer(dest_buffer, offset, size, value); + dest_buffer.MarkUsage(offset, size); return true; } @@ -603,6 +608,7 @@ void BufferCache

::CommitAsyncFlushesHigh() { VAddr orig_cpu_addr = static_cast(second_copy.src_offset); const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; async_downloads += std::make_pair(base_interval, 1); + buffer.MarkUsage(copy.src_offset, copy.size); runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); normalized_copies.push_back(second_copy); } @@ -621,8 +627,9 @@ void BufferCache

::CommitAsyncFlushesHigh() { // Have in mind the staging buffer offset for the copy copy.dst_offset += download_staging.offset; const std::array copies{copy}; - runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, - false); + Buffer& buffer = slot_buffers[buffer_id]; + buffer.MarkUsage(copy.src_offset, copy.size); + runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); } runtime.PostCopyBarrier(); runtime.Finish(); @@ -742,7 +749,7 @@ void BufferCache

::BindHostIndexBuffer() { {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; std::memcpy(upload_staging.mapped_span.data(), draw_state.inline_index_draw_indexes.data(), size); - runtime.CopyBuffer(buffer, upload_staging.buffer, copies); + runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true); } else { buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); } @@ -754,6 +761,7 @@ void BufferCache

::BindHostIndexBuffer() { offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); runtime.BindIndexBuffer(buffer, new_offset, size); } else { + buffer.MarkUsage(offset, size); runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, draw_state.index_buffer.first, draw_state.index_buffer.count, buffer, offset, size); @@ -790,6 +798,7 @@ void BufferCache

::BindHostVertexBuffers() { const u32 stride = maxwell3d->regs.vertex_streams[index].stride; const u32 offset = buffer.Offset(binding.cpu_addr); + buffer.MarkUsage(offset, binding.size); host_bindings.buffers.push_back(&buffer); host_bindings.offsets.push_back(offset); @@ -895,6 +904,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; } + buffer.MarkUsage(offset, size); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { @@ -913,6 +923,7 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); + buffer.MarkUsage(offset, size); const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; if constexpr (NEEDS_BIND_STORAGE_INDEX) { runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); @@ -933,6 +944,7 @@ void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; + buffer.MarkUsage(offset, size); if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { runtime.BindImageBuffer(buffer, offset, size, format); @@ -963,9 +975,10 @@ void BufferCache

::BindHostTransformFeedbackBuffers() { SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); + buffer.MarkUsage(offset, size); host_bindings.buffers.push_back(&buffer); host_bindings.offsets.push_back(offset); - host_bindings.sizes.push_back(binding.size); + host_bindings.sizes.push_back(size); } if (host_bindings.buffers.size() > 0) { runtime.BindTransformFeedbackBuffers(host_bindings); @@ -989,6 +1002,7 @@ void BufferCache

::BindHostComputeUniformBuffers() { SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); + buffer.MarkUsage(offset, size); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); ++binding_index; @@ -1009,6 +1023,7 @@ void BufferCache

::BindHostComputeStorageBuffers() { SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); + buffer.MarkUsage(offset, size); const bool is_written = ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; if constexpr (NEEDS_BIND_STORAGE_INDEX) { @@ -1030,6 +1045,7 @@ void BufferCache

::BindHostComputeTextureBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; + buffer.MarkUsage(offset, size); if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { runtime.BindImageBuffer(buffer, offset, size, format); @@ -1148,10 +1164,11 @@ void BufferCache

::UpdateVertexBuffer(u32 index) { if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { size = static_cast(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); } + const BufferId buffer_id = FindBuffer(*cpu_addr, size); channel_state->vertex_buffers[index] = Binding{ .cpu_addr = *cpu_addr, .size = size, - .buffer_id = FindBuffer(*cpu_addr, size), + .buffer_id = buffer_id, }; } @@ -1403,7 +1420,8 @@ void BufferCache

::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, .dst_offset = dst_base_offset, .size = overlap.SizeBytes(), }); - runtime.CopyBuffer(new_buffer, overlap, copies); + new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size); + runtime.CopyBuffer(new_buffer, overlap, copies, true); DeleteBuffer(overlap_id, true); } @@ -1416,7 +1434,9 @@ BufferId BufferCache

::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { const u32 size = static_cast(overlap.end - overlap.begin); const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); auto& new_buffer = slot_buffers[new_buffer_id]; - runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0); + const size_t size_bytes = new_buffer.SizeBytes(); + runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); + new_buffer.MarkUsage(0, size_bytes); for (const BufferId overlap_id : overlap.ids) { JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); } @@ -1469,11 +1489,6 @@ void BufferCache

::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { template bool BufferCache

::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { - return SynchronizeBufferImpl(buffer, cpu_addr, size); -} - -template -bool BufferCache

::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { boost::container::small_vector copies; u64 total_size_bytes = 0; u64 largest_copy = 0; @@ -1495,51 +1510,6 @@ bool BufferCache

::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s return false; } -template -bool BufferCache

::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { - boost::container::small_vector copies; - u64 total_size_bytes = 0; - u64 largest_copy = 0; - IntervalSet found_sets{}; - auto make_copies = [&] { - for (auto& interval : found_sets) { - const std::size_t sub_size = interval.upper() - interval.lower(); - const VAddr cpu_addr_ = interval.lower(); - copies.push_back(BufferCopy{ - .src_offset = total_size_bytes, - .dst_offset = cpu_addr_ - buffer.CpuAddr(), - .size = sub_size, - }); - total_size_bytes += sub_size; - largest_copy = std::max(largest_copy, sub_size); - } - const std::span copies_span(copies.data(), copies.size()); - UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); - }; - memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { - const VAddr base_adr = cpu_addr_out; - const VAddr end_adr = base_adr + range_size; - const IntervalType add_interval{base_adr, end_adr}; - found_sets.add(add_interval); - }); - if (found_sets.empty()) { - return true; - } - const IntervalType search_interval{cpu_addr, cpu_addr + size}; - auto it = common_ranges.lower_bound(search_interval); - auto it_end = common_ranges.upper_bound(search_interval); - if (it == common_ranges.end()) { - make_copies(); - return false; - } - while (it != it_end) { - found_sets.subtract(*it); - it++; - } - make_copies(); - return false; -} - template void BufferCache

::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, std::span copies) { @@ -1588,7 +1558,8 @@ void BufferCache

::MappedUploadMemory([[maybe_unused]] Buffer& buffer, // Apply the staging offset copy.src_offset += upload_staging.offset; } - runtime.CopyBuffer(buffer, upload_staging.buffer, copies); + const bool can_reorder = runtime.CanReorderUpload(buffer, copies); + runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); } } @@ -1630,7 +1601,8 @@ void BufferCache

::InlineMemoryImplementation(VAddr dest_address, size_t copy_ }}; u8* const src_pointer = upload_staging.mapped_span.data(); std::memcpy(src_pointer, inlined_buffer.data(), copy_size); - runtime.CopyBuffer(buffer, upload_staging.buffer, copies); + const bool can_reorder = runtime.CanReorderUpload(buffer, copies); + runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); } else { buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); } @@ -1683,8 +1655,9 @@ void BufferCache

::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si for (BufferCopy& copy : copies) { // Modify copies to have the staging offset in mind copy.dst_offset += download_staging.offset; + buffer.MarkUsage(copy.src_offset, copy.size); } - runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); + runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); runtime.Finish(); for (const BufferCopy& copy : copies) { const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index c4f6e8d12..9ff8a5080 100755 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -525,10 +525,6 @@ private: bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); - bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); - - bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); - void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, std::span copies); diff --git a/src/video_core/buffer_cache/usage_tracker.h b/src/video_core/buffer_cache/usage_tracker.h new file mode 100755 index 000000000..ab05fe415 --- /dev/null +++ b/src/video_core/buffer_cache/usage_tracker.h @@ -0,0 +1,79 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common/alignment.h" +#include "common/common_types.h" + +namespace VideoCommon { + +class UsageTracker { + static constexpr size_t BYTES_PER_BIT_SHIFT = 6; + static constexpr size_t PAGE_SHIFT = 6 + BYTES_PER_BIT_SHIFT; + static constexpr size_t PAGE_BYTES = 1 << PAGE_SHIFT; + +public: + explicit UsageTracker(size_t size) { + const size_t num_pages = (size >> PAGE_SHIFT) + 1; + pages.resize(num_pages, 0ULL); + } + + void Reset() noexcept { + std::ranges::fill(pages, 0ULL); + } + + void Track(u64 offset, u64 size) noexcept { + const size_t page = offset >> PAGE_SHIFT; + const size_t page_end = (offset + size) >> PAGE_SHIFT; + TrackPage(page, offset, size); + if (page == page_end) { + return; + } + for (size_t i = page + 1; i < page_end; i++) { + pages[i] = ~u64{0}; + } + const size_t offset_end = offset + size; + const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES); + TrackPage(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned); + } + + [[nodiscard]] bool IsUsed(u64 offset, u64 size) const noexcept { + const size_t page = offset >> PAGE_SHIFT; + const size_t page_end = (offset + size) >> PAGE_SHIFT; + if (IsPageUsed(page, offset, size)) { + return true; + } + for (size_t i = page + 1; i < page_end; i++) { + if (pages[i] != 0) { + return true; + } + } + const size_t offset_end = offset + size; + const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES); + return IsPageUsed(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned); + } + +private: + void TrackPage(u64 page, u64 offset, u64 size) noexcept { + const size_t offset_in_page = offset % PAGE_BYTES; + const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT; + const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT; + const size_t mask = ~u64{0} >> (64 - num_bits); + pages[page] |= (~u64{0} & mask) << first_bit; + } + + bool IsPageUsed(u64 page, u64 offset, u64 size) const noexcept { + const size_t offset_in_page = offset % PAGE_BYTES; + const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT; + const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT; + const size_t mask = ~u64{0} >> (64 - num_bits); + const size_t mask2 = (~u64{0} & mask) << first_bit; + return (pages[page] & mask2) != 0; + } + +private: + std::vector pages; +}; + +} // namespace VideoCommon diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index fc6f5b2d5..a0f63cfef 100755 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -178,13 +178,14 @@ void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, } void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, - std::span copies, bool barrier) { + std::span copies, bool barrier, + bool) { CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); } void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, - std::span copies) { - CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); + std::span copies, bool) { + CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies, true); } void BufferCacheRuntime::PreCopyBarrier() { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 667b8e5cc..17613e70a 100755 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -30,6 +30,8 @@ public: void MakeResident(GLenum access) noexcept; + void MarkUsage(u64 offset, u64 size) {} + [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { @@ -66,22 +68,29 @@ public: [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); + bool CanReorderUpload(const Buffer&, std::span) { + return false; + } + void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, - std::span copies, bool barrier = true); + std::span copies, bool barrier); void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, - std::span copies, bool barrier = true); + std::span copies, bool barrier); void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, - std::span copies, bool barrier = true); + std::span copies, bool barrier, + bool can_reorder_upload = false); void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, - std::span copies); + std::span copies, bool); void PreCopyBarrier(); void PostCopyBarrier(); void Finish(); + void TickFrame(VideoCommon::SlotVector&) noexcept {} + void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 7703a2170..5e91d0362 100755 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -454,7 +454,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, - src_view](vk::CommandBuffer cmdbuf) { + src_view](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { // TODO: Barriers const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); @@ -479,7 +479,8 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([this, dst_framebuffer, src_image_view, src_image, src_sampler, dst_region, - src_region, src_size, pipeline, layout](vk::CommandBuffer cmdbuf) { + src_region, src_size, pipeline, + layout](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL); BeginRenderPass(cmdbuf, dst_framebuffer); const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); @@ -512,7 +513,7 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, - src_stencil_view, this](vk::CommandBuffer cmdbuf) { + src_stencil_view, this](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { // TODO: Barriers const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, @@ -581,17 +582,17 @@ void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_ma const VkPipeline pipeline = FindOrEmplaceClearColorPipeline(key); const VkPipelineLayout layout = *clear_color_pipeline_layout; scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record( - [pipeline, layout, color_mask, clear_color, dst_region](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - const std::array blend_color = { - (color_mask & 0x1) ? 1.0f : 0.0f, (color_mask & 0x2) ? 1.0f : 0.0f, - (color_mask & 0x4) ? 1.0f : 0.0f, (color_mask & 0x8) ? 1.0f : 0.0f}; - cmdbuf.SetBlendConstants(blend_color.data()); - BindBlitState(cmdbuf, dst_region); - cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_color); - cmdbuf.Draw(3, 1, 0, 0); - }); + scheduler.Record([pipeline, layout, color_mask, clear_color, + dst_region](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + const std::array blend_color = { + (color_mask & 0x1) ? 1.0f : 0.0f, (color_mask & 0x2) ? 1.0f : 0.0f, + (color_mask & 0x4) ? 1.0f : 0.0f, (color_mask & 0x8) ? 1.0f : 0.0f}; + cmdbuf.SetBlendConstants(blend_color.data()); + BindBlitState(cmdbuf, dst_region); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_color); + cmdbuf.Draw(3, 1, 0, 0); + }); scheduler.InvalidateState(); } @@ -608,14 +609,13 @@ void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool const VkPipeline pipeline = FindOrEmplaceClearStencilPipeline(key); const VkPipelineLayout layout = *clear_color_pipeline_layout; scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf) { - constexpr std::array blend_constants{0.0f, 0.0f, 0.0f, 0.0f}; - cmdbuf.SetBlendConstants(blend_constants.data()); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - BindBlitState(cmdbuf, dst_region); - cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth); - cmdbuf.Draw(3, 1, 0, 0); - }); + scheduler.Record( + [pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + BindBlitState(cmdbuf, dst_region); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth); + cmdbuf.Draw(3, 1, 0, 0); + }); scheduler.InvalidateState(); } @@ -627,7 +627,8 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb const VkExtent2D extent = GetConversionExtent(src_image_view); scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -673,7 +674,7 @@ void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, - this](vk::CommandBuffer cmdbuf) { + this](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -867,7 +868,7 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline( .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .depthTestEnable = key.depth_clear, + .depthTestEnable = VK_FALSE, .depthWriteEnable = key.depth_clear, .depthCompareOp = VK_COMPARE_OP_ALWAYS, .depthBoundsTestEnable = VK_FALSE, diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index fd013e35c..e29e1f8f3 100755 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -239,7 +239,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([&](vk::CommandBuffer cmdbuf) { + scheduler.Record([&](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const VkImageMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 6b4e96770..c0bc2333c 100755 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -201,43 +201,44 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, .depth = 1, }, }; - scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) { - const VkImage image = *raw_images[index]; - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - VkImageMemoryBarrier read_barrier = base_barrier; - read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + scheduler.Record( + [this, copy, index = image_index](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + const VkImage image = *raw_images[index]; + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + VkImageMemoryBarrier read_barrier = base_barrier; + read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - VkImageMemoryBarrier write_barrier = base_barrier; - write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + VkImageMemoryBarrier write_barrier = base_barrier; + write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, - read_barrier); - cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, write_barrier); - }); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, read_barrier); + cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, write_barrier); + }); } const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); @@ -250,7 +251,7 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, .height = (up_scale * framebuffer.height) >> down_shift, }; scheduler.Record([this, index = image_index, size, - anti_alias_pass](vk::CommandBuffer cmdbuf) { + anti_alias_pass](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const VkImageMemoryBarrier base_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -375,7 +376,7 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, } scheduler.Record([this, host_framebuffer, index = image_index, - size = render_area](vk::CommandBuffer cmdbuf) { + size = render_area](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 65d130d3b..7c7cb2005 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo } // Anonymous namespace Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) - : VideoCommon::BufferBase(null_params) {} + : VideoCommon::BufferBase(null_params), tracker{4096} {} Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_), - device{&runtime.device}, buffer{ - CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} { + device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, + tracker{SizeBytes()} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); } @@ -179,7 +179,8 @@ public: if (!host_visible) { scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, - dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { + dst_buffer = *buffer, + size_bytes](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const VkBufferCopy copy{ .srcOffset = src_offset, .dstOffset = 0, @@ -210,9 +211,10 @@ public: const size_t sub_first_offset = static_cast(first % 4) * GetQuadsNum(num_indices); const size_t offset = (sub_first_offset + GetQuadsNum(first)) * 6ULL * BytesPerIndex(index_type); - scheduler.Record([buffer_ = *buffer, index_type_, offset](vk::CommandBuffer cmdbuf) { - cmdbuf.BindIndexBuffer(buffer_, offset, index_type_); - }); + scheduler.Record( + [buffer_ = *buffer, index_type_, offset](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.BindIndexBuffer(buffer_, offset, index_type_); + }); } protected: @@ -355,12 +357,31 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const { return device.CanReportMemoryUsage(); } +void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector& slot_buffers) noexcept { + for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) { + it->ResetUsageTracking(); + } +} + void BufferCacheRuntime::Finish() { scheduler.Finish(); } +bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer, + std::span copies) { + if (Settings::values.disable_buffer_reorder) { + return false; + } + const bool can_use_upload_cmdbuf = + std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) { + return !buffer.IsRegionUsed(copy.dst_offset, copy.size); + }); + return can_use_upload_cmdbuf; +} + void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, - std::span copies, bool barrier) { + std::span copies, bool barrier, + bool can_reorder_upload) { if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { return; } @@ -376,21 +397,31 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; + // Measuring a popular game, this number never exceeds the specified size once data is warmed up boost::container::small_vector vk_copies(copies.size()); std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); + if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) { + scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer, + vk::CommandBuffer upload_cmdbuf) { + upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); + }); + return; + } + scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { - if (barrier) { - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); - } - cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); - if (barrier) { - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); - } - }); + scheduler.Record( + [src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + if (barrier) { + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); + } + cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); + if (barrier) { + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); + } + }); } void BufferCacheRuntime::PreCopyBarrier() { @@ -401,7 +432,7 @@ void BufferCacheRuntime::PreCopyBarrier() { .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, }; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([](vk::CommandBuffer cmdbuf) { + scheduler.Record([](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); }); @@ -415,7 +446,7 @@ void BufferCacheRuntime::PostCopyBarrier() { .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([](vk::CommandBuffer cmdbuf) { + scheduler.Record([](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); }); @@ -439,13 +470,14 @@ void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t si }; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([dest_buffer, offset, size, value](vk::CommandBuffer cmdbuf) { - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, READ_BARRIER); - cmdbuf.FillBuffer(dest_buffer, offset, size, value); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, WRITE_BARRIER); - }); + scheduler.Record( + [dest_buffer, offset, size, value](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); + cmdbuf.FillBuffer(dest_buffer, offset, size, value); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); + }); } void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, @@ -470,15 +502,16 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat ReserveNullBuffer(); vk_buffer = *null_buffer; } - scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) { - cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type); - }); + scheduler.Record( + [vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type); + }); } void BufferCacheRuntime::BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count) { if (count == 0) { ReserveNullBuffer(); - scheduler.Record([this](vk::CommandBuffer cmdbuf) { + scheduler.Record([this](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.BindIndexBuffer(*null_buffer, 0, VK_INDEX_TYPE_UINT32); }); return; @@ -499,19 +532,20 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset return; } if (device.IsExtExtendedDynamicStateSupported()) { - scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) { - const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0; - const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE; - const VkDeviceSize vk_stride = stride; - cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); - }); + scheduler.Record( + [index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0; + const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE; + const VkDeviceSize vk_stride = stride; + cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); + }); } else { if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) { ReserveNullBuffer(); buffer = *null_buffer; offset = 0; } - scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { + scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.BindVertexBuffer(index, buffer, offset); }); } @@ -533,7 +567,8 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bi } if (device.IsExtExtendedDynamicStateSupported()) { scheduler.Record([this, bindings_ = std::move(bindings), - buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, std::min(bindings_.max_index - bindings_.min_index, device.GetMaxVertexInputBindings()), @@ -542,7 +577,8 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bi }); } else { scheduler.Record([this, bindings_ = std::move(bindings), - buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { cmdbuf.BindVertexBuffers(bindings_.min_index, std::min(bindings_.max_index - bindings_.min_index, device.GetMaxVertexInputBindings()), @@ -565,7 +601,7 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, offset = 0; size = 0; } - scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) { + scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const VkDeviceSize vk_offset = offset; const VkDeviceSize vk_size = size; cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size); @@ -581,8 +617,8 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings< for (u32 index = 0; index < bindings.buffers.size(); ++index) { buffer_handles.push_back(bindings.buffers[index]->Handle()); } - scheduler.Record([bindings_ = std::move(bindings), - buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + scheduler.Record([bindings_ = std::move(bindings), buffer_handles_ = std::move(buffer_handles)]( + vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast(buffer_handles_.size()), buffer_handles_.data(), bindings_.offsets.data(), bindings_.sizes.data()); @@ -613,7 +649,7 @@ void BufferCacheRuntime::ReserveNullBuffer() { } scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) { + scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0); }); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 96c75242e..a2806d89f 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -5,6 +5,7 @@ #include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/memory_tracker_base.h" +#include "video_core/buffer_cache/usage_tracker.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" @@ -34,6 +35,18 @@ public: return *buffer; } + [[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept { + return tracker.IsUsed(offset, size); + } + + void MarkUsage(u64 offset, u64 size) noexcept { + tracker.Track(offset, size); + } + + void ResetUsageTracking() noexcept { + tracker.Reset(); + } + operator VkBuffer() const noexcept { return *buffer; } @@ -49,6 +62,7 @@ private: const Device* device{}; vk::Buffer buffer; std::vector views; + VideoCommon::UsageTracker tracker; }; class QuadArrayIndexBuffer; @@ -67,6 +81,8 @@ public: ComputePassDescriptorQueue& compute_pass_descriptor_queue, DescriptorPool& descriptor_pool); + void TickFrame(VideoCommon::SlotVector& slot_buffers) noexcept; + void Finish(); u64 GetDeviceLocalMemory() const; @@ -79,12 +95,15 @@ public: [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); + bool CanReorderUpload(const Buffer& buffer, std::span copies); + void FreeDeferredStagingBuffer(StagingBufferRef& ref); void PreCopyBarrier(); void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, - std::span copies, bool barrier = true); + std::span copies, bool barrier, + bool can_reorder_upload = false); void PostCopyBarrier(); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 81c8b5f17..84ed2d383 100755 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -324,22 +324,23 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([this, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) { - static constexpr u32 DISPATCH_SIZE = 1024; - static constexpr VkMemoryBarrier WRITE_BARRIER{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, - }; - const VkDescriptorSet set = descriptor_allocator.Commit(); - device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); - cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); - }); + scheduler.Record( + [this, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + static constexpr u32 DISPATCH_SIZE = 1024; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + }; + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); + }); return {staging.buffer, staging.offset}; } @@ -383,7 +384,7 @@ std::pair QuadIndexedPass::Assemble( scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, index_shift, - is_strip](vk::CommandBuffer cmdbuf) { + is_strip](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -423,7 +424,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { static constexpr VkMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, @@ -483,7 +484,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([this, descriptor_data, min_accumulation_limit, max_accumulation_limit, - runs_to_do, used_offset](vk::CommandBuffer cmdbuf) { + runs_to_do, used_offset](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { static constexpr VkMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, @@ -548,8 +549,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, const VkImageAspectFlags aspect_mask = image.AspectMask(); const VkImage vk_image = image.Handle(); const bool is_initialized = image.ExchangeInitialization(); - scheduler.Record([vk_pipeline, vk_image, aspect_mask, - is_initialized](vk::CommandBuffer cmdbuf) { + scheduler.Record([vk_pipeline, vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { const VkImageMemoryBarrier image_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -592,7 +593,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, ASSERT(params.destination == (std::array{0, 0, 0})); ASSERT(params.bytes_per_block_log2 == 4); scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, - params, descriptor_data](vk::CommandBuffer cmdbuf) { + params, descriptor_data](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const AstcPushConstants uniforms{ .blocks_dims = block_dims, .layer_stride = params.layer_stride, @@ -608,7 +609,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); }); } - scheduler.Record([vk_image, aspect_mask](vk::CommandBuffer cmdbuf) { + scheduler.Record([vk_image, aspect_mask](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const VkImageMemoryBarrier image_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -699,7 +700,7 @@ void MSAACopyPass::CopyImage(Image& dst_image, Image& src_image, }; scheduler.Record([this, dst = dst_image.Handle(), msaa_pipeline, num_dispatches, - descriptor_data](vk::CommandBuffer cmdbuf) { + descriptor_data](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const VkDescriptorSet set = descriptor_allocator.Commit(); device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, msaa_pipeline); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 42a3e8fae..9ab3161f7 100755 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -199,15 +199,15 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built - scheduler.Record([this](vk::CommandBuffer) { + scheduler.Record([this](vk::CommandBuffer, vk::CommandBuffer) { std::unique_lock lock{build_mutex}; build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } const void* const descriptor_data{guest_descriptor_queue.UpdateData()}; const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty(); - scheduler.Record([this, descriptor_data, is_rescaling, - rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, descriptor_data, is_rescaling, rescaling_data = rescaling.Data()]( + vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); if (!descriptor_set_layout) { return; diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index e0bf4e679..029100e6f 100755 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -38,7 +38,8 @@ VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView imag UpdateDescriptorSet(image_index, image_view); - scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { const VkImageMemoryBarrier base_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0f82316a3..83604690a 100755 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -493,7 +493,7 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built - scheduler.Record([this](vk::CommandBuffer) { + scheduler.Record([this](vk::CommandBuffer, vk::CommandBuffer) { std::unique_lock lock{build_mutex}; build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); @@ -502,42 +502,43 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)}; const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; const void* const descriptor_data{guest_descriptor_queue.UpdateData()}; - scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), - is_rescaling, update_rescaling, - uses_render_area = render_area.uses_render_area, - render_area_data = render_area.words](vk::CommandBuffer cmdbuf) { - if (bind_pipeline) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); - } - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, - RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), - rescaling_data.data()); - if (update_rescaling) { - const f32 config_down_factor{Settings::values.resolution_info.down_factor}; - const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; + scheduler.Record( + [this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), is_rescaling, + update_rescaling, uses_render_area = render_area.uses_render_area, + render_area_data = render_area.words](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + if (bind_pipeline) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + } cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, - RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor), - &scale_down_factor); - } - if (uses_render_area) { - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, - RENDERAREA_LAYOUT_OFFSET, sizeof(render_area_data), - &render_area_data); - } - if (!descriptor_set_layout) { - return; - } - if (uses_push_descriptor) { - cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout, - 0, descriptor_data); - } else { - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - const vk::Device& dev{device.GetLogical()}; - dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, - descriptor_set, nullptr); - } - }); + RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), + rescaling_data.data()); + if (update_rescaling) { + const f32 config_down_factor{Settings::values.resolution_info.down_factor}; + const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor), + &scale_down_factor); + } + if (uses_render_area) { + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RENDERAREA_LAYOUT_OFFSET, sizeof(render_area_data), + &render_area_data); + } + if (!descriptor_set_layout) { + return; + } + if (uses_push_descriptor) { + cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, + *pipeline_layout, 0, descriptor_data); + } else { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, + descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, nullptr); + } + }); } void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index d250b4181..3a04b10e2 100755 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) { Refresh(); } -VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, - VkSemaphore wait_semaphore, u64 host_tick) { +VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, + VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, + u64 host_tick) { if (semaphore) { - return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick); + return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, + host_tick); } else { - return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick); + return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick); } } @@ -115,6 +117,7 @@ static constexpr std::array wait_stage_masks{ }; VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, + vk::CommandBuffer& upload_cmdbuf, VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick) { const VkSemaphore timeline_semaphore = *semaphore; @@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, const std::array signal_values{host_tick, u64(0)}; const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; + const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf}; + const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; const VkTimelineSemaphoreSubmitInfo timeline_si{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, @@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, .waitSemaphoreCount = num_wait_semaphores, .pWaitSemaphores = &wait_semaphore, .pWaitDstStageMask = wait_stage_masks.data(), - .commandBufferCount = 1, - .pCommandBuffers = cmdbuf.address(), + .commandBufferCount = static_cast(cmdbuffers.size()), + .pCommandBuffers = cmdbuffers.data(), .signalSemaphoreCount = num_signal_semaphores, .pSignalSemaphores = signal_semaphores.data(), }; @@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, return device.GetGraphicsQueue().Submit(submit_info); } -VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, - VkSemaphore wait_semaphore, u64 host_tick) { +VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, + vk::CommandBuffer& upload_cmdbuf, + VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, + u64 host_tick) { const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; + const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf}; + const VkSubmitInfo submit_info{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = nullptr, .waitSemaphoreCount = num_wait_semaphores, .pWaitSemaphores = &wait_semaphore, .pWaitDstStageMask = wait_stage_masks.data(), - .commandBufferCount = 1, - .pCommandBuffers = cmdbuf.address(), + .commandBufferCount = static_cast(cmdbuffers.size()), + .pCommandBuffers = cmdbuffers.data(), .signalSemaphoreCount = num_signal_semaphores, .pSignalSemaphores = &signal_semaphore, }; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index ade2882ae..356d2e346 100755 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -52,14 +52,16 @@ public: void Wait(u64 tick); /// Submits the device graphics queue, updating the tick as necessary - VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, - VkSemaphore wait_semaphore, u64 host_tick); + VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, + VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick); private: - VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, - VkSemaphore wait_semaphore, u64 host_tick); - VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, - VkSemaphore wait_semaphore, u64 host_tick); + VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, + VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, + u64 host_tick); + VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, + VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, + u64 host_tick); void WaitThread(std::stop_token token); diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index d681bd22a..752723352 100755 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -166,7 +166,7 @@ void PresentManager::Present(Frame* frame) { return; } - scheduler.Record([this, frame](vk::CommandBuffer) { + scheduler.Record([this, frame](vk::CommandBuffer, vk::CommandBuffer) { std::unique_lock lock{queue_mutex}; present_queue.push(frame); frame_cv.notify_one(); @@ -488,4 +488,4 @@ void PresentManager::CopyToSwapchain(Frame* frame) { swapchain.Present(render_semaphore); } -} // namespace Vulkan \ No newline at end of file +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 13cb4b28e..30926e269 100755 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -138,9 +138,10 @@ public: }; accumulation_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([buffer = *accumulation_buffer](vk::CommandBuffer cmdbuf) { - cmdbuf.FillBuffer(buffer, 0, 8, 0); - }); + scheduler.Record( + [buffer = *accumulation_buffer](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.FillBuffer(buffer, 0, 8, 0); + }); } ~SamplesStreamer() = default; @@ -150,8 +151,8 @@ public: return; } ReserveHostQuery(); - scheduler.Record([query_pool = current_query_pool, - query_index = current_bank_slot](vk::CommandBuffer cmdbuf) { + scheduler.Record([query_pool = current_query_pool, query_index = current_bank_slot]( + vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const bool use_precise = Settings::IsGPULevelHigh(); cmdbuf.BeginQuery(query_pool, static_cast(query_index), use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0); @@ -163,8 +164,8 @@ public: if (!has_started) { return; } - scheduler.Record([query_pool = current_query_pool, - query_index = current_bank_slot](vk::CommandBuffer cmdbuf) { + scheduler.Record([query_pool = current_query_pool, query_index = current_bank_slot]( + vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.EndQuery(query_pool, static_cast(query_index)); }); has_started = false; @@ -227,8 +228,8 @@ public: auto& resolve_buffer = buffers[resolve_buffer_index]; VkQueryPool query_pool = bank->GetInnerPool(); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([start, amount, base_offset, query_pool, - buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) { + scheduler.Record([start, amount, base_offset, query_pool, buffer = *resolve_buffer]( + vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const VkBufferMemoryBarrier copy_query_pool_barrier{ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .pNext = nullptr, @@ -291,9 +292,10 @@ public: } else { scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([buffer = *accumulation_buffer](vk::CommandBuffer cmdbuf) { - cmdbuf.FillBuffer(buffer, 0, 8, 0); - }); + scheduler.Record( + [buffer = *accumulation_buffer](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.FillBuffer(buffer, 0, 8, 0); + }); } ReplicateCurrentQueryIfNeeded(); @@ -608,7 +610,7 @@ public: void Sync(StagingBufferRef& stagging_buffer, size_t extra_offset, size_t start, size_t size) { scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([this, dst_buffer = stagging_buffer.buffer, extra_offset, start, - size](vk::CommandBuffer cmdbuf) { + size](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { std::array copy{VkBufferCopy{ .srcOffset = start * QUERY_SIZE, .dstOffset = extra_offset, @@ -793,7 +795,7 @@ public: .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([](vk::CommandBuffer cmdbuf) { + scheduler.Record([](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); }); @@ -841,13 +843,14 @@ private: } has_flushed_end_pending = true; if (!has_started || buffers_count == 0) { - scheduler.Record([](vk::CommandBuffer cmdbuf) { + scheduler.Record([](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); UpdateBuffers(); return; } - scheduler.Record([this, total = static_cast(buffers_count)](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, total = static_cast(buffers_count)](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { cmdbuf.BeginTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); }); UpdateBuffers(); @@ -861,12 +864,12 @@ private: has_flushed_end_pending = false; if (buffers_count == 0) { - scheduler.Record([](vk::CommandBuffer cmdbuf) { + scheduler.Record([](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } else { - scheduler.Record([this, - total = static_cast(buffers_count)](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, total = static_cast(buffers_count)]( + vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); }); } @@ -918,7 +921,7 @@ private: scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dst_buffer = current_bank->GetBuffer(), src_buffer = counter_buffers[stream], src_offset = offsets[stream], - slot](vk::CommandBuffer cmdbuf) { + slot](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); std::array copy{VkBufferCopy{ @@ -1253,8 +1256,9 @@ void QueryCacheRuntime::PauseHostConditionalRendering() { return; } if (impl->is_hcr_running) { - impl->scheduler.Record( - [](vk::CommandBuffer cmdbuf) { cmdbuf.EndConditionalRenderingEXT(); }); + impl->scheduler.Record([](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.EndConditionalRenderingEXT(); + }); } impl->is_hcr_running = false; } @@ -1264,9 +1268,10 @@ void QueryCacheRuntime::ResumeHostConditionalRendering() { return; } if (!impl->is_hcr_running) { - impl->scheduler.Record([hcr_setup = impl->hcr_setup](vk::CommandBuffer cmdbuf) { - cmdbuf.BeginConditionalRenderingEXT(hcr_setup); - }); + impl->scheduler.Record( + [hcr_setup = impl->hcr_setup](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.BeginConditionalRenderingEXT(hcr_setup); + }); } impl->is_hcr_running = true; } @@ -1436,12 +1441,12 @@ void QueryCacheRuntime::Barriers(bool is_prebarrier) { .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; if (is_prebarrier) { - impl->scheduler.Record([](vk::CommandBuffer cmdbuf) { + impl->scheduler.Record([](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); }); } else { - impl->scheduler.Record([](vk::CommandBuffer cmdbuf) { + impl->scheduler.Record([](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); }); @@ -1536,13 +1541,14 @@ void QueryCacheRuntime::SyncValues(std::span values, VkBuffer ba } impl->scheduler.RequestOutsideRenderPassOperationContext(); - impl->scheduler.Record([src_buffer, dst_buffers = std::move(impl->buffers_to_upload_to), - vk_copies = std::move(impl->copies_setup)](vk::CommandBuffer cmdbuf) { - size_t size = dst_buffers.size(); - for (size_t i = 0; i < size; i++) { - cmdbuf.CopyBuffer(src_buffer, dst_buffers[i].first, vk_copies[i]); - } - }); + impl->scheduler.Record( + [src_buffer, dst_buffers = std::move(impl->buffers_to_upload_to), + vk_copies = std::move(impl->copies_setup)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + size_t size = dst_buffers.size(); + for (size_t i = 0; i < size; i++) { + cmdbuf.CopyBuffer(src_buffer, dst_buffers[i].first, vk_copies[i]); + } + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3fe6c95d7..3cdd2721d 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -216,7 +216,7 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); const u32 num_instances{instance_count}; const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)}; - scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { + scheduler.Record([draw_params](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { if (draw_params.is_indexed) { cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, draw_params.first_index, draw_params.base_vertex, @@ -238,7 +238,7 @@ void RasterizerVulkan::DrawIndirect() { const auto& offset = indirect_buffer.second; if (params.is_byte_count) { scheduler.Record([buffer_obj = buffer->Handle(), offset, - stride = params.stride](vk::CommandBuffer cmdbuf) { + stride = params.stride](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.DrawIndirectByteCountEXT(1, 0, buffer_obj, offset, 0, static_cast(stride)); }); @@ -250,7 +250,7 @@ void RasterizerVulkan::DrawIndirect() { const auto& offset_base = count.second; scheduler.Record([draw_buffer_obj = draw_buffer->Handle(), buffer_obj = buffer->Handle(), offset_base, offset, - params](vk::CommandBuffer cmdbuf) { + params](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { if (params.is_indexed) { cmdbuf.DrawIndexedIndirectCount( buffer_obj, offset, draw_buffer_obj, offset_base, @@ -263,7 +263,8 @@ void RasterizerVulkan::DrawIndirect() { }); return; } - scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) { + scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { if (params.is_indexed) { cmdbuf.DrawIndexedIndirect(buffer_obj, offset, static_cast(params.max_draw_counts), @@ -387,7 +388,8 @@ void RasterizerVulkan::Clear(u32 layer_count) { if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B && regs.clear_surface.A) { - scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { + scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { const VkClearAttachment attachment{ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .colorAttachment = color_attachment, @@ -422,8 +424,7 @@ void RasterizerVulkan::Clear(u32 layer_count) { return; } - if (use_stencil && framebuffer->HasAspectStencilBit() && regs.stencil_front_mask != 0xFF && - regs.stencil_front_mask != 0) { + if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) { Region2D dst_region = { Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, Offset2D{.x = clear_rect.rect.offset.x + static_cast(clear_rect.rect.extent.width), @@ -434,7 +435,7 @@ void RasterizerVulkan::Clear(u32 layer_count) { regs.stencil_front_func_mask, dst_region); } else { scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, - clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { + clear_rect, aspect_flags](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { VkClearAttachment attachment; attachment.aspectMask = aspect_flags; attachment.colorAttachment = 0; @@ -466,14 +467,16 @@ void RasterizerVulkan::DispatchCompute() { buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([indirect_buffer = buffer->Handle(), - indirect_offset = offset](vk::CommandBuffer cmdbuf) { + indirect_offset = offset](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.DispatchIndirect(indirect_buffer, indirect_offset); }); return; } const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); + scheduler.Record([dim](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.Dispatch(dim[0], dim[1], dim[2]); + }); } void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { @@ -691,7 +694,7 @@ void RasterizerVulkan::WaitForIdle() { query_cache.NotifyWFI(); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) { + scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetEvent(event, flags); cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {}); }); @@ -949,7 +952,9 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg .minDepth = 0.0f, .maxDepth = 1.0f, }; - scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewport); }); + scheduler.Record([viewport](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.SetViewport(0, viewport); + }); return; } const bool is_rescaling{texture_cache.IsRescaling()}; @@ -964,7 +969,7 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), }; - scheduler.Record([this, viewport_list](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, viewport_list](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const u32 num_viewports = std::min(device.GetMaxViewports(), Maxwell::NumViewports); const vk::Span viewports(viewport_list.data(), num_viewports); cmdbuf.SetViewport(0, viewports); @@ -999,7 +1004,7 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs GetScissorState(regs, 14, up_scale, down_shift), GetScissorState(regs, 15, up_scale, down_shift), }; - scheduler.Record([this, scissor_list](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, scissor_list](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const u32 num_scissors = std::min(device.GetMaxViewports(), Maxwell::NumViewports); const vk::Span scissors(scissor_list.data(), num_scissors); cmdbuf.SetScissor(0, scissors); @@ -1032,22 +1037,23 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { units = static_cast(static_cast(units) * rescale_factor); return false; })(); - scheduler.Record([constant = units, clamp = regs.depth_bias_clamp, - factor = regs.slope_scale_depth_bias, force_unorm, - precise = device.HasExactDepthBiasControl()](vk::CommandBuffer cmdbuf) { - if (force_unorm) { - VkDepthBiasRepresentationInfoEXT info{ - .sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT, - .pNext = nullptr, - .depthBiasRepresentation = - VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT, - .depthBiasExact = precise ? VK_TRUE : VK_FALSE, - }; - cmdbuf.SetDepthBias(constant, clamp, factor, &info); - return; - } - cmdbuf.SetDepthBias(constant, clamp, factor); - }); + scheduler.Record( + [constant = units, clamp = regs.depth_bias_clamp, factor = regs.slope_scale_depth_bias, + force_unorm, + precise = device.HasExactDepthBiasControl()](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + if (force_unorm) { + VkDepthBiasRepresentationInfoEXT info{ + .sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT, + .pNext = nullptr, + .depthBiasRepresentation = + VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT, + .depthBiasExact = precise ? VK_TRUE : VK_FALSE, + }; + cmdbuf.SetDepthBias(constant, clamp, factor, &info); + return; + } + cmdbuf.SetDepthBias(constant, clamp, factor); + }); } void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1056,16 +1062,19 @@ void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& reg } const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a}; - scheduler.Record( - [blend_color](vk::CommandBuffer cmdbuf) { cmdbuf.SetBlendConstants(blend_color.data()); }); + scheduler.Record([blend_color](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.SetBlendConstants(blend_color.data()); + }); } void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthBounds()) { return; } - scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( - vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBounds(min, max); }); + scheduler.Record([min = regs.depth_bounds[0], + max = regs.depth_bounds[1]](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.SetDepthBounds(min, max); + }); } void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1093,7 +1102,8 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) } } scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref, - two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { + two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { const bool set_back = two_sided && front_ref != back_ref; // Front face cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT @@ -1119,7 +1129,8 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) } scheduler.Record([front_write_mask = regs.stencil_front_mask, back_write_mask = regs.stencil_back_mask, - two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { + two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { const bool set_back = two_sided && front_write_mask != back_write_mask; // Front face cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT @@ -1145,7 +1156,8 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) } scheduler.Record([front_test_mask = regs.stencil_front_func_mask, back_test_mask = regs.stencil_back_func_mask, - two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { + two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { const bool set_back = two_sided && front_test_mask != back_test_mask; // Front face cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT @@ -1166,7 +1178,8 @@ void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { } const float width = regs.line_anti_alias_enable ? regs.line_width_smooth : regs.line_width_aliased; - scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); }); + scheduler.Record( + [width](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetLineWidth(width); }); } void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1174,7 +1187,7 @@ void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { return; } scheduler.Record([enabled = regs.gl_cull_test_enabled, - cull_face = regs.gl_cull_face](vk::CommandBuffer cmdbuf) { + cull_face = regs.gl_cull_face](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetCullModeEXT(enabled ? MaxwellToVK::CullFace(cull_face) : VK_CULL_MODE_NONE); }); } @@ -1188,7 +1201,7 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); enabled = false; } - scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf) { + scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetDepthBoundsTestEnableEXT(enable); }); } @@ -1197,36 +1210,40 @@ void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& re if (!state_tracker.TouchDepthTestEnable()) { return; } - scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) { - cmdbuf.SetDepthTestEnableEXT(enable); - }); + scheduler.Record( + [enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.SetDepthTestEnableEXT(enable); + }); } void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthWriteEnable()) { return; } - scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) { - cmdbuf.SetDepthWriteEnableEXT(enable); - }); + scheduler.Record( + [enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.SetDepthWriteEnableEXT(enable); + }); } void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchPrimitiveRestartEnable()) { return; } - scheduler.Record([enable = regs.primitive_restart.enabled](vk::CommandBuffer cmdbuf) { - cmdbuf.SetPrimitiveRestartEnableEXT(enable); - }); + scheduler.Record( + [enable = regs.primitive_restart.enabled](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.SetPrimitiveRestartEnableEXT(enable); + }); } void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchRasterizerDiscardEnable()) { return; } - scheduler.Record([disable = regs.rasterize_enable](vk::CommandBuffer cmdbuf) { - cmdbuf.SetRasterizerDiscardEnableEXT(disable == 0); - }); + scheduler.Record( + [disable = regs.rasterize_enable](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.SetRasterizerDiscardEnableEXT(disable == 0); + }); } void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1260,15 +1277,16 @@ void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& re }; const u32 topology_index = static_cast(maxwell3d->draw_manager->GetDrawState().topology); const u32 enable = enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]]; - scheduler.Record( - [enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBiasEnableEXT(enable != 0); }); + scheduler.Record([enable](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.SetDepthBiasEnableEXT(enable != 0); + }); } void RasterizerVulkan::UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchLogicOpEnable()) { return; } - scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf) { + scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetLogicOpEnableEXT(enable != 0); }); } @@ -1283,15 +1301,16 @@ void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& r Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ || regs.viewport_clip_control.geometry_clip == Maxwell::ViewportClipControl::GeometryClip::FrustumZ); - scheduler.Record( - [is_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthClampEnableEXT(is_enabled); }); + scheduler.Record([is_enabled](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.SetDepthClampEnableEXT(is_enabled); + }); } void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthCompareOp()) { return; } - scheduler.Record([func = regs.depth_test_func](vk::CommandBuffer cmdbuf) { + scheduler.Record([func = regs.depth_test_func](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetDepthCompareOpEXT(MaxwellToVK::ComparisonOp(func)); }); } @@ -1306,8 +1325,9 @@ void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) { front_face = front_face == VK_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE; } - scheduler.Record( - [front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); }); + scheduler.Record([front_face](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.SetFrontFaceEXT(front_face); + }); } void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1325,7 +1345,7 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { const Maxwell::StencilOp::Op back_zpass = regs.stencil_back_op.zpass; const Maxwell::ComparisonOp back_compare = regs.stencil_back_op.func; scheduler.Record([fail, zfail, zpass, compare, back_fail, back_zfail, back_zpass, - back_compare](vk::CommandBuffer cmdbuf) { + back_compare](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_BIT, MaxwellToVK::StencilOp(fail), MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), MaxwellToVK::ComparisonOp(compare)); @@ -1336,11 +1356,12 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { }); } else { // Front face defines the stencil op of both faces - scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) { - cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail), - MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), - MaxwellToVK::ComparisonOp(compare)); - }); + scheduler.Record( + [fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail), + MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), + MaxwellToVK::ComparisonOp(compare)); + }); } } @@ -1351,7 +1372,8 @@ void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) { const auto op_value = static_cast(regs.logic_op.op); auto op = op_value >= 0x1500 && op_value < 0x1510 ? static_cast(op_value - 0x1500) : VK_LOGIC_OP_NO_OP; - scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); }); + scheduler.Record( + [op](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetLogicOpEXT(op); }); } void RasterizerVulkan::UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1377,7 +1399,7 @@ void RasterizerVulkan::UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs) { current |= VK_COLOR_COMPONENT_A_BIT; } } - scheduler.Record([setup_masks](vk::CommandBuffer cmdbuf) { + scheduler.Record([setup_masks](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetColorWriteMaskEXT(0, setup_masks); }); } @@ -1387,7 +1409,7 @@ void RasterizerVulkan::UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs) { std::ranges::transform( regs.blend.enable, setup_enables.begin(), [&](const auto& is_enabled) { return is_enabled != 0 ? VK_TRUE : VK_FALSE; }); - scheduler.Record([setup_enables](vk::CommandBuffer cmdbuf) { + scheduler.Record([setup_enables](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetColorBlendEnableEXT(0, setup_enables); }); } @@ -1410,7 +1432,7 @@ void RasterizerVulkan::UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs) { } blend_setup(regs.blend_per_target[index]); } - scheduler.Record([setup_blends](vk::CommandBuffer cmdbuf) { + scheduler.Record([setup_blends](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetColorBlendEquationEXT(0, setup_blends); }); } @@ -1420,7 +1442,7 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& if (!state_tracker.TouchStencilTestEnable()) { return; } - scheduler.Record([enable = regs.stencil_enable](vk::CommandBuffer cmdbuf) { + scheduler.Record([enable = regs.stencil_enable](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetStencilTestEnableEXT(enable); }); } @@ -1478,7 +1500,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) .divisor = is_instanced ? input_binding.frequency : 1, }); } - scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) { + scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { cmdbuf.SetVertexInputEXT(bindings, attributes); }); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 3b7aaf006..54ab47751 100755 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -22,11 +22,12 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_WaitForWorker); -void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { +void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf, + vk::CommandBuffer upload_cmdbuf) { auto command = first; while (command != nullptr) { auto next = command->GetNext(); - command->Execute(cmdbuf); + command->Execute(cmdbuf, upload_cmdbuf); command->~Command(); command = next; } @@ -102,22 +103,23 @@ void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) { state.framebuffer = framebuffer_handle; state.render_area = render_area; - Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) { - const VkRenderPassBeginInfo renderpass_bi{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .pNext = nullptr, - .renderPass = renderpass, - .framebuffer = framebuffer_handle, - .renderArea = - { - .offset = {.x = 0, .y = 0}, - .extent = render_area, - }, - .clearValueCount = 0, - .pClearValues = nullptr, - }; - cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - }); + Record( + [renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer_handle, + .renderArea = + { + .offset = {.x = 0, .y = 0}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); + }); num_renderpass_images = framebuffer->NumImages(); renderpass_images = framebuffer->Images(); renderpass_image_ranges = framebuffer->ImageRanges(); @@ -180,7 +182,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) { // Perform the work, tracking whether the chunk was a submission // before executing. const bool has_submit = work->HasSubmit(); - work->ExecuteAll(current_cmdbuf); + work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf); // If the chunk was a submission, reallocate the command buffer. if (has_submit) { @@ -205,6 +207,13 @@ void Scheduler::AllocateWorkerCommandBuffer() { .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, .pInheritanceInfo = nullptr, }); + current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); + current_upload_cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); } u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { @@ -212,7 +221,9 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se InvalidateState(); const u64 signal_value = master_semaphore->NextTick(); - Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf, + vk::CommandBuffer upload_cmdbuf) { + upload_cmdbuf.End(); cmdbuf.End(); if (on_submit) { @@ -221,7 +232,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se std::scoped_lock lock{submit_mutex}; switch (const VkResult result = master_semaphore->SubmitQueue( - cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { + cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { case VK_SUCCESS: break; case VK_ERROR_DEVICE_LOST: @@ -275,7 +286,7 @@ void Scheduler::EndRenderPass() { return; } Record([num_images = num_renderpass_images, images = renderpass_images, - ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { + ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { std::array barriers; for (size_t i = 0; i < num_images; ++i) { barriers[i] = VkImageMemoryBarrier{ diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 04692dbdf..ab0ec3e9f 100755 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -119,7 +119,7 @@ private: public: virtual ~Command() = default; - virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; + virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0; Command* GetNext() const { return next; @@ -142,8 +142,8 @@ private: TypedCommand(TypedCommand&&) = delete; TypedCommand& operator=(TypedCommand&&) = delete; - void Execute(vk::CommandBuffer cmdbuf) const override { - command(cmdbuf); + void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override { + command(cmdbuf, upload_cmdbuf); } private: @@ -152,7 +152,7 @@ private: class CommandChunk final { public: - void ExecuteAll(vk::CommandBuffer cmdbuf); + void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf); template bool Record(T& command) { @@ -228,6 +228,7 @@ private: VideoCommon::QueryCacheBase* query_cache = nullptr; vk::CommandBuffer current_cmdbuf; + vk::CommandBuffer current_upload_cmdbuf; std::unique_ptr chunk; std::function on_submit; diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp index 5efd7d66e..5870a2b25 100755 --- a/src/video_core/renderer_vulkan/vk_smaa.cpp +++ b/src/video_core/renderer_vulkan/vk_smaa.cpp @@ -103,7 +103,7 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc }}}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([&](vk::CommandBuffer cmdbuf) { + scheduler.Record([&](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { TransitionImageLayout(cmdbuf, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_UNDEFINED); cmdbuf.CopyBufferToImage(*upload_buffer, *image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, @@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) { UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); - scheduler.Record([&](vk::CommandBuffer& cmdbuf) { + scheduler.Record([&](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { for (auto& images : m_dynamic_images) { for (size_t i = 0; i < MaxDynamicImage; i++) { ClearColorImage(cmdbuf, *images.images[i]); @@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_ UpdateDescriptorSets(source_image_view, image_index); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) { + scheduler.Record([=, this](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index b4eedb966..087d7bb75 100755 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -36,6 +36,10 @@ public: StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false); void FreeDeferred(StagingBufferRef& ref); + [[nodiscard]] VkBuffer StreamBuf() const noexcept { + return *stream_buffer; + } + void TickFrame(); private: diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index aedf1687d..bb5aed939 100755 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -709,7 +709,7 @@ void BlitScale(Scheduler& scheduler, VkImage src_image, VkImage dst_image, const scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d, - vk_filter, up_scaling](vk::CommandBuffer cmdbuf) { + vk_filter, up_scaling](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const VkOffset2D src_size{ .x = static_cast(up_scaling ? extent.width : resolution.ScaleUp(extent.width)), .y = static_cast(is_2d && up_scaling ? extent.height @@ -955,7 +955,7 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, const VkImage src_image = src.Handle(); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask, - vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) { + vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { RangedBarrierRange dst_range; RangedBarrierRange src_range; for (const VkBufferImageCopy& copy : vk_in_copies) { @@ -1104,7 +1104,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst const bool is_resolve = is_src_msaa && !is_dst_msaa; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([filter, dst_region, src_region, dst_image, src_image, dst_layers, src_layers, - aspect_mask, is_resolve](vk::CommandBuffer cmdbuf) { + aspect_mask, is_resolve](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const std::array read_barriers{ VkImageMemoryBarrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, @@ -1240,7 +1240,8 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, const VkImage dst_image = dst.Handle(); const VkImage src_image = src.Handle(); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { + scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf, + vk::CommandBuffer) { RangedBarrierRange dst_range; RangedBarrierRange src_range; for (const VkImageCopy& copy : vk_copies) { @@ -1402,7 +1403,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, const VkImageAspectFlags vk_aspect_mask = aspect_mask; const bool is_initialized = std::exchange(initialized, true); scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, - vk_copies](vk::CommandBuffer cmdbuf) { + vk_copies](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); }); if (is_rescaled) { @@ -1441,7 +1442,8 @@ void Image::DownloadMemory(std::span buffers_span, std::spanRequestOutsideRenderPassOperationContext(); scheduler->Record([buffers = std::move(buffers_vector), image = *original_image, - aspect_mask_ = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { + aspect_mask_ = aspect_mask, + vk_copies](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { const VkImageMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index bdc8403ff..7a8714001 100755 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -138,6 +138,10 @@ public: return Iterator(this, SlotId{SlotId::INVALID_INDEX}); } + [[nodiscard]] size_t size() const noexcept { + return values_capacity - free_list.size(); + } + private: struct NonTrivialDummy { NonTrivialDummy() noexcept {} diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index cb2faa446..56982f17b 100755 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -1098,6 +1098,10 @@ public: return &handle; } + VkCommandBuffer operator*() const noexcept { + return handle; + } + void Begin(const VkCommandBufferBeginInfo& begin_info) const { Check(dld->vkBeginCommandBuffer(handle, &begin_info)); } diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 291dc8ab9..4c334be53 100755 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -61,6 +61,8 @@ void ConfigureDebug::SetConfiguration() { ui->enable_all_controllers->setChecked(Settings::values.enable_all_controllers.GetValue()); ui->enable_renderdoc_hotkey->setEnabled(runtime_lock); ui->enable_renderdoc_hotkey->setChecked(Settings::values.enable_renderdoc_hotkey.GetValue()); + ui->disable_buffer_reorder->setEnabled(runtime_lock); + ui->disable_buffer_reorder->setChecked(Settings::values.disable_buffer_reorder.GetValue()); ui->enable_graphics_debugging->setEnabled(runtime_lock); ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); ui->enable_shader_feedback->setEnabled(runtime_lock); @@ -114,6 +116,7 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.enable_all_controllers = ui->enable_all_controllers->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Settings::values.enable_renderdoc_hotkey = ui->enable_renderdoc_hotkey->isChecked(); + Settings::values.disable_buffer_reorder = ui->disable_buffer_reorder->isChecked(); Settings::values.renderer_shader_feedback = ui->enable_shader_feedback->isChecked(); Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index 40736c277..2e73951e0 100755 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -271,19 +271,6 @@ - - - true - - - When checked, it disables the macro HLE functions. Enabling this makes games run slower - - - Disable Macro HLE - - - - true @@ -306,59 +293,7 @@ - - - - When checked, yuzu will log statistics about the compiled pipeline cache - - - Enable Shader Feedback - - - - - - true - - - When checked, it disables the macro Just In Time compiler. Enabling this makes games run slower - - - Disable Macro JIT - - - - - - - Qt::Vertical - - - QSizePolicy::Preferred - - - - 20 - 0 - - - - - - - - true - - - When checked, the graphics API enters a slower debugging mode - - - Enable Graphics Debugging - - - - true @@ -378,6 +313,81 @@ + + + + true + + + When checked, it disables the macro Just In Time compiler. Enabling this makes games run slower + + + Disable Macro JIT + + + + + + + true + + + When checked, it disables the macro HLE functions. Enabling this makes games run slower + + + Disable Macro HLE + + + + + + + true + + + When checked, the graphics API enters a slower debugging mode + + + Enable Graphics Debugging + + + + + + + Qt::Vertical + + + QSizePolicy::Preferred + + + + 20 + 0 + + + + + + + + When checked, yuzu will log statistics about the compiled pipeline cache + + + Enable Shader Feedback + + + + + + + <html><head/><body><p>When checked, disables reording of mapped memory uploads which allows to associate uploads with specific draws. May reduce performance in some cases.</p></body></html> + + + Disable Buffer Reorder + + + diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 5444a3925..565bb6dd9 100755 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -1556,6 +1556,10 @@ void GMainWindow::ConnectMenuEvents() { // Tools connect_menu(ui->action_Rederive, std::bind(&GMainWindow::OnReinitializeKeys, this, ReinitializeKeyBehavior::Warning)); + connect_menu(ui->action_Load_Cabinet_Nickname_Owner, &GMainWindow::OnCabinetNicknameAndOwner); + connect_menu(ui->action_Load_Cabinet_Eraser, &GMainWindow::OnCabinetEraser); + connect_menu(ui->action_Load_Cabinet_Restorer, &GMainWindow::OnCabinetRestorer); + connect_menu(ui->action_Load_Cabinet_Formatter, &GMainWindow::OnCabinetFormatter); connect_menu(ui->action_Load_Mii_Edit, &GMainWindow::OnMiiEdit); connect_menu(ui->action_Capture_Screenshot, &GMainWindow::OnCaptureScreenshot); @@ -1573,6 +1577,7 @@ void GMainWindow::ConnectMenuEvents() { void GMainWindow::UpdateMenuState() { const bool is_paused = emu_thread == nullptr || !emu_thread->IsRunning(); + const bool is_firmware_available = CheckFirmwarePresence(); const std::array running_actions{ ui->action_Stop, @@ -1583,10 +1588,22 @@ void GMainWindow::UpdateMenuState() { ui->action_Pause, }; + const std::array applet_actions{ + ui->action_Load_Cabinet_Nickname_Owner, + ui->action_Load_Cabinet_Eraser, + ui->action_Load_Cabinet_Restorer, + ui->action_Load_Cabinet_Formatter, + ui->action_Load_Mii_Edit, + }; + for (QAction* action : running_actions) { action->setEnabled(emulation_running); } + for (QAction* action : applet_actions) { + action->setEnabled(is_firmware_available); + } + ui->action_Capture_Screenshot->setEnabled(emulation_running && !is_paused); if (emulation_running && is_paused) { @@ -1596,8 +1613,6 @@ void GMainWindow::UpdateMenuState() { } multiplayer_state->UpdateNotificationStatus(); - - ui->action_Load_Mii_Edit->setEnabled(CheckFirmwarePresence()); } void GMainWindow::OnDisplayTitleBars(bool show) { @@ -2108,6 +2123,8 @@ void GMainWindow::OnEmulationStopped() { OnTasStateChanged(); render_window->FinalizeCamera(); + system->GetAppletManager().SetCurrentAppletId(Service::AM::Applets::AppletId::None); + // Enable all controllers system->HIDCore().SetSupportedStyleTag({Core::HID::NpadStyleSet::All}); @@ -4157,6 +4174,46 @@ void GMainWindow::OnToggleStatusBar() { statusBar()->setVisible(ui->action_Show_Status_Bar->isChecked()); } +void GMainWindow::OnCabinetNicknameAndOwner() { + OnCabinet(Service::NFP::CabinetMode::StartNicknameAndOwnerSettings); +} + +void GMainWindow::OnCabinetEraser() { + OnCabinet(Service::NFP::CabinetMode::StartGameDataEraser); +} + +void GMainWindow::OnCabinetRestorer() { + OnCabinet(Service::NFP::CabinetMode::StartRestorer); +} + +void GMainWindow::OnCabinetFormatter() { + OnCabinet(Service::NFP::CabinetMode::StartFormatter); +} + +void GMainWindow::OnCabinet(Service::NFP::CabinetMode mode) { + constexpr u64 CabinetId = 0x0100000000001002ull; + auto bis_system = system->GetFileSystemController().GetSystemNANDContents(); + if (!bis_system) { + QMessageBox::warning(this, tr("No firmware available"), + tr("Please install the firmware to use the Amiibo applet.")); + return; + } + + auto cabinet_nca = bis_system->GetEntry(CabinetId, FileSys::ContentRecordType::Program); + if (!cabinet_nca) { + QMessageBox::warning(this, tr("Cabinet Applet"), + tr("Amiibo applet is not available. Please reinstall firmware.")); + return; + } + + system->GetAppletManager().SetCurrentAppletId(Service::AM::Applets::AppletId::Cabinet); + system->GetAppletManager().SetCabinetMode(mode); + + QString filename = QString::fromStdString((cabinet_nca->GetFullPath())); + UISettings::values.roms_path = QFileInfo(filename).path(); + BootGame(filename); +} + void GMainWindow::OnMiiEdit() { constexpr u64 MiiEditId = 0x0100000000001009ull; auto bis_system = system->GetFileSystemController().GetSystemNANDContents(); @@ -4173,6 +4230,8 @@ void GMainWindow::OnMiiEdit() { return; } + system->GetAppletManager().SetCurrentAppletId(Service::AM::Applets::AppletId::MiiEdit); + const auto filename = QString::fromStdString((mii_applet_nca->GetFullPath())); UISettings::values.roms_path = QFileInfo(filename).path(); BootGame(filename); diff --git a/src/yuzu/main.h b/src/yuzu/main.h index f2bfdfd44..0c49c5e0c 100755 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -106,6 +106,10 @@ namespace Service::NFC { class NfcDevice; } // namespace Service::NFC +namespace Service::NFP { +enum class CabinetMode : u8; +} // namespace Service::NFP + namespace Ui { class MainWindow; } @@ -370,6 +374,11 @@ private slots: void ResetWindowSize720(); void ResetWindowSize900(); void ResetWindowSize1080(); + void OnCabinetNicknameAndOwner(); + void OnCabinetEraser(); + void OnCabinetRestorer(); + void OnCabinetFormatter(); + void OnCabinet(Service::NFP::CabinetMode mode); void OnMiiEdit(); void OnCaptureScreenshot(); void OnReinitializeKeys(ReinitializeKeyBehavior behavior); diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index ef0c016c9..b276f4cea 100755 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -137,6 +137,15 @@ &Tools + + + &Amiibo + + + + + + &TAS @@ -150,6 +159,7 @@ + @@ -370,6 +380,26 @@ &Capture Screenshot + + + &Set Nickname and Owner + + + + + &Delete Game Data + + + + + &Restore Amiibo + + + + + &Format Amiibo + + Open &Mii Editor