early-access version 3913

This commit is contained in:
pineappleEA 2023-10-07 20:35:47 +02:00
parent 729208f9b5
commit b5cdabcc51
13 changed files with 103 additions and 72 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 3912. This is the source code for early-access 3913.
## Legal Notice ## Legal Notice

View file

@ -218,7 +218,6 @@ public:
return; return;
} }
m_window->OnSurfaceChanged(m_native_window); m_window->OnSurfaceChanged(m_native_window);
m_system.Renderer().NotifySurfaceChanged();
} }
void ConfigureFilesystemProvider(const std::string& filepath) { void ConfigureFilesystemProvider(const std::string& filepath) {

View file

@ -89,9 +89,6 @@ public:
void RequestScreenshot(void* data, std::function<void(bool)> callback, void RequestScreenshot(void* data, std::function<void(bool)> callback,
const Layout::FramebufferLayout& layout); const Layout::FramebufferLayout& layout);
/// This is called to notify the rendering backend of a surface change
virtual void NotifySurfaceChanged() {}
protected: protected:
Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
std::unique_ptr<Core::Frontend::GraphicsContext> context; std::unique_ptr<Core::Frontend::GraphicsContext> context;

View file

@ -618,8 +618,9 @@ void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool
const VkPipeline pipeline = FindOrEmplaceClearStencilPipeline(key); const VkPipeline pipeline = FindOrEmplaceClearStencilPipeline(key);
const VkPipelineLayout layout = *clear_color_pipeline_layout; const VkPipelineLayout layout = *clear_color_pipeline_layout;
scheduler.RequestRenderpass(dst_framebuffer); scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record( scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf, vk::CommandBuffer) {
[pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { constexpr std::array blend_constants{0.0f, 0.0f, 0.0f, 0.0f};
cmdbuf.SetBlendConstants(blend_constants.data());
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
BindBlitState(cmdbuf, dst_region); BindBlitState(cmdbuf, dst_region);
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth);
@ -877,7 +878,7 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline(
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = 0,
.depthTestEnable = VK_FALSE, .depthTestEnable = key.depth_clear,
.depthWriteEnable = key.depth_clear, .depthWriteEnable = key.depth_clear,
.depthCompareOp = VK_COMPARE_OP_ALWAYS, .depthCompareOp = VK_COMPARE_OP_ALWAYS,
.depthBoundsTestEnable = VK_FALSE, .depthBoundsTestEnable = VK_FALSE,

View file

@ -56,10 +56,6 @@ public:
return device.GetDriverName(); return device.GetDriverName();
} }
void NotifySurfaceChanged() override {
present_manager.NotifySurfaceChanged();
}
private: private:
void Report() const; void Report() const;

View file

@ -103,8 +103,7 @@ PresentManager::PresentManager(const vk::Instance& instance_,
surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(), surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(),
swapchain.GetImageViewFormat())}, swapchain.GetImageViewFormat())},
use_present_thread{Settings::values.async_presentation.GetValue()}, use_present_thread{Settings::values.async_presentation.GetValue()},
image_count{swapchain.GetImageCount()}, last_render_surface{ image_count{swapchain.GetImageCount()} {
render_window_.GetWindowInfo().render_surface} {
auto& dld = device.GetLogical(); auto& dld = device.GetLogical();
cmdpool = dld.CreateCommandPool({ cmdpool = dld.CreateCommandPool({
@ -289,44 +288,36 @@ void PresentManager::PresentThread(std::stop_token token) {
} }
} }
void PresentManager::NotifySurfaceChanged() { void PresentManager::RecreateSwapchain(Frame* frame) {
#ifdef ANDROID swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb);
std::scoped_lock lock{recreate_surface_mutex}; image_count = swapchain.GetImageCount();
recreate_surface_cv.notify_one();
#endif
} }
void PresentManager::CopyToSwapchain(Frame* frame) { void PresentManager::CopyToSwapchain(Frame* frame) {
MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); bool requires_recreation = false;
const auto recreate_swapchain = [&] { while (true) {
swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb); try {
image_count = swapchain.GetImageCount(); // Recreate surface and swapchain if needed.
}; if (requires_recreation) {
#ifdef ANDROID
std::unique_lock lock{recreate_surface_mutex};
const auto needs_recreation = [&] {
if (last_render_surface != render_window.GetWindowInfo().render_surface) {
return true;
}
if (swapchain.NeedsRecreation(frame->is_srgb)) {
return true;
}
return false;
};
recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400),
[&]() { return !needs_recreation(); });
// If the frontend recreated the surface, recreate the renderer surface and swapchain.
if (last_render_surface != render_window.GetWindowInfo().render_surface) {
last_render_surface = render_window.GetWindowInfo().render_surface;
surface = CreateSurface(instance, render_window.GetWindowInfo()); surface = CreateSurface(instance, render_window.GetWindowInfo());
recreate_swapchain(); RecreateSwapchain(frame);
} }
#endif
// Draw to swapchain.
return CopyToSwapchainImpl(frame);
} catch (const vk::Exception& except) {
if (except.GetResult() != VK_ERROR_SURFACE_LOST_KHR) {
throw;
}
requires_recreation = true;
}
}
}
void PresentManager::CopyToSwapchainImpl(Frame* frame) {
MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
// If the size or colorspace of the incoming frames has changed, recreate the swapchain // If the size or colorspace of the incoming frames has changed, recreate the swapchain
// to account for that. // to account for that.
@ -334,11 +325,11 @@ void PresentManager::CopyToSwapchain(Frame* frame) {
const bool size_changed = const bool size_changed =
swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height; swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height;
if (srgb_changed || size_changed) { if (srgb_changed || size_changed) {
recreate_swapchain(); RecreateSwapchain(frame);
} }
while (swapchain.AcquireNextImage()) { while (swapchain.AcquireNextImage()) {
recreate_swapchain(); RecreateSwapchain(frame);
} }
const vk::CommandBuffer cmdbuf{frame->cmdbuf}; const vk::CommandBuffer cmdbuf{frame->cmdbuf};

View file

@ -54,14 +54,15 @@ public:
/// Waits for the present thread to finish presenting all queued frames. /// Waits for the present thread to finish presenting all queued frames.
void WaitPresent(); void WaitPresent();
/// This is called to notify the rendering backend of a surface change
void NotifySurfaceChanged();
private: private:
void PresentThread(std::stop_token token); void PresentThread(std::stop_token token);
void CopyToSwapchain(Frame* frame); void CopyToSwapchain(Frame* frame);
void CopyToSwapchainImpl(Frame* frame);
void RecreateSwapchain(Frame* frame);
private: private:
const vk::Instance& instance; const vk::Instance& instance;
Core::Frontend::EmuWindow& render_window; Core::Frontend::EmuWindow& render_window;
@ -76,16 +77,13 @@ private:
std::queue<Frame*> free_queue; std::queue<Frame*> free_queue;
std::condition_variable_any frame_cv; std::condition_variable_any frame_cv;
std::condition_variable free_cv; std::condition_variable free_cv;
std::condition_variable recreate_surface_cv;
std::mutex swapchain_mutex; std::mutex swapchain_mutex;
std::mutex recreate_surface_mutex;
std::mutex queue_mutex; std::mutex queue_mutex;
std::mutex free_mutex; std::mutex free_mutex;
std::jthread present_thread; std::jthread present_thread;
bool blit_supported; bool blit_supported;
bool use_present_thread; bool use_present_thread;
std::size_t image_count{}; std::size_t image_count{};
void* last_render_surface{};
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -424,7 +424,8 @@ void RasterizerVulkan::Clear(u32 layer_count) {
return; return;
} }
if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) { if (use_stencil && framebuffer->HasAspectStencilBit() && regs.stencil_front_mask != 0xFF &&
regs.stencil_front_mask != 0) {
Region2D dst_region = { Region2D dst_region = {
Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width), Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width),

View file

@ -24,25 +24,38 @@ using namespace Common::Literals;
// Maximum potential alignment of a Vulkan buffer // Maximum potential alignment of a Vulkan buffer
constexpr VkDeviceSize MAX_ALIGNMENT = 256; constexpr VkDeviceSize MAX_ALIGNMENT = 256;
// Maximum size to put elements in the stream buffer
constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
// Stream buffer size in bytes // Stream buffer size in bytes
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
size_t Region(size_t iterator) noexcept { size_t GetStreamBufferSize(const Device& device) {
return iterator / REGION_SIZE; VkDeviceSize size{0};
if (device.HasDebuggingToolAttached()) {
ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) {
size = std::max(size, heap.size);
});
// If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be
// loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue
// as the heap will be much larger.
if (size <= 256_MiB) {
size = size * 40 / 100;
}
} else {
size = MAX_STREAM_BUFFER_SIZE;
}
return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE);
} }
} // Anonymous namespace } // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_) Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size /
StagingBufferPool::NUM_SYNCS} {
VkBufferCreateInfo stream_ci = { VkBufferCreateInfo stream_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = 0,
.size = STREAM_BUFFER_SIZE, .size = stream_buffer_size,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE, .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
@ -63,7 +76,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
StagingBufferPool::~StagingBufferPool() = default; StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
return GetStreamBuffer(size); return GetStreamBuffer(size);
} }
return GetStagingBuffer(size, usage, deferred); return GetStagingBuffer(size, usage, deferred);
@ -101,7 +114,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
used_iterator = iterator; used_iterator = iterator;
free_iterator = std::max(free_iterator, iterator + size); free_iterator = std::max(free_iterator, iterator + size);
if (iterator + size >= STREAM_BUFFER_SIZE) { if (iterator + size >= stream_buffer_size) {
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
current_tick); current_tick);
used_iterator = 0; used_iterator = 0;

View file

@ -94,6 +94,9 @@ private:
void ReleaseCache(MemoryUsage usage); void ReleaseCache(MemoryUsage usage);
void ReleaseLevel(StagingBuffersCache& cache, size_t log2); void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
size_t Region(size_t iter) const noexcept {
return iter / region_size;
}
const Device& device; const Device& device;
MemoryAllocator& memory_allocator; MemoryAllocator& memory_allocator;
@ -101,6 +104,8 @@ private:
vk::Buffer stream_buffer; vk::Buffer stream_buffer;
std::span<u8> stream_pointer; std::span<u8> stream_pointer;
VkDeviceSize stream_buffer_size;
VkDeviceSize region_size;
size_t iterator = 0; size_t iterator = 0;
size_t used_iterator = 0; size_t used_iterator = 0;

View file

@ -9,6 +9,7 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/literals.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/polyfill_ranges.h" #include "common/polyfill_ranges.h"
#include "video_core/vulkan_common/vma.h" #include "video_core/vulkan_common/vma.h"
@ -69,8 +70,7 @@ struct Range {
case MemoryUsage::Download: case MemoryUsage::Download:
return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
case MemoryUsage::DeviceLocal: case MemoryUsage::DeviceLocal:
return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | return {};
VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
} }
return {}; return {};
} }
@ -212,7 +212,20 @@ MemoryAllocator::MemoryAllocator(const Device& device_)
: device{device_}, allocator{device.GetAllocator()}, : device{device_}, allocator{device.GetAllocator()},
properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
buffer_image_granularity{ buffer_image_granularity{
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {
// GPUs not supporting rebar may only have a region with less than 256MB host visible/device
// local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to
// the heap running out of memory. With RenderDoc attached and only a small host/device region,
// only allow the stream buffer in this memory heap.
if (device.HasDebuggingToolAttached()) {
using namespace Common::Literals;
ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) {
if (heap.size <= 256_MiB) {
valid_memory_types &= ~(1u << index);
}
});
}
}
MemoryAllocator::~MemoryAllocator() = default; MemoryAllocator::~MemoryAllocator() = default;
@ -244,7 +257,7 @@ vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsa
.usage = MemoryUsageVma(usage), .usage = MemoryUsageVma(usage),
.requiredFlags = 0, .requiredFlags = 0,
.preferredFlags = MemoryUsagePreferedVmaFlags(usage), .preferredFlags = MemoryUsagePreferedVmaFlags(usage),
.memoryTypeBits = 0, .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
.pool = VK_NULL_HANDLE, .pool = VK_NULL_HANDLE,
.pUserData = nullptr, .pUserData = nullptr,
.priority = 0.f, .priority = 0.f,

View file

@ -7,6 +7,7 @@
#include <span> #include <span>
#include <vector> #include <vector>
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h" #include "video_core/vulkan_common/vulkan_wrapper.h"
VK_DEFINE_HANDLE(VmaAllocator) VK_DEFINE_HANDLE(VmaAllocator)
@ -26,6 +27,18 @@ enum class MemoryUsage {
Stream, ///< Requests device local host visible buffer, falling back host memory. Stream, ///< Requests device local host visible buffer, falling back host memory.
}; };
template <typename F>
void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) {
auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties;
for (size_t i = 0; i < memory_props.memoryTypeCount; i++) {
auto& memory_type = memory_props.memoryTypes[i];
if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
(memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]);
}
}
}
/// Ownership handle of a memory commitment. /// Ownership handle of a memory commitment.
/// Points to a subregion of a memory allocation. /// Points to a subregion of a memory allocation.
class MemoryCommit { class MemoryCommit {
@ -124,6 +137,7 @@ private:
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
// and optimal images // and optimal images
u32 valid_memory_types{~0u};
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -117,6 +117,9 @@ public:
virtual ~Exception() = default; virtual ~Exception() = default;
const char* what() const noexcept override; const char* what() const noexcept override;
VkResult GetResult() const noexcept {
return result;
}
private: private:
VkResult result; VkResult result;