early-access version 1936
This commit is contained in:
parent
b6ccebf4de
commit
6ee725cc08
12 changed files with 443 additions and 77 deletions
|
@ -496,7 +496,7 @@ endif()
|
|||
# Ensure libusb is properly configured (based on dolphin libusb include)
|
||||
if(NOT APPLE AND NOT YUZU_USE_BUNDLED_LIBUSB)
|
||||
include(FindPkgConfig)
|
||||
if (PKG_CONFIG_FOUND)
|
||||
if (PKG_CONFIG_FOUND AND NOT CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD")
|
||||
pkg_check_modules(LIBUSB QUIET libusb-1.0>=1.0.24)
|
||||
else()
|
||||
find_package(LibUSB)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
yuzu emulator early access
|
||||
=============
|
||||
|
||||
This is the source code for early-access 1935.
|
||||
This is the source code for early-access 1936.
|
||||
|
||||
## Legal Notice
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ public:
|
|||
|
||||
[[nodiscard]] IR::Inst* Inst() const;
|
||||
[[nodiscard]] IR::Inst* InstRecursive() const;
|
||||
[[nodiscard]] IR::Inst* TryInstRecursive() const;
|
||||
[[nodiscard]] IR::Value Resolve() const;
|
||||
[[nodiscard]] IR::Reg Reg() const;
|
||||
[[nodiscard]] IR::Pred Pred() const;
|
||||
|
@ -308,6 +309,13 @@ inline IR::Inst* Value::InstRecursive() const {
|
|||
return inst;
|
||||
}
|
||||
|
||||
inline IR::Inst* Value::TryInstRecursive() const {
|
||||
if (IsIdentity()) {
|
||||
return inst->Arg(0).TryInstRecursive();
|
||||
}
|
||||
return type == Type::Opaque ? inst : nullptr;
|
||||
}
|
||||
|
||||
inline IR::Value Value::Resolve() const {
|
||||
if (IsIdentity()) {
|
||||
return inst->Arg(0).Resolve();
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
|
@ -88,6 +89,26 @@ bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Return true when all values in a range are equal
|
||||
template <typename Range>
|
||||
bool AreEqual(const Range& range) {
|
||||
auto resolver{[](const auto& value) { return value.Resolve(); }};
|
||||
auto equal{[](const IR::Value& lhs, const IR::Value& rhs) {
|
||||
if (lhs == rhs) {
|
||||
return true;
|
||||
}
|
||||
// Not equal, but try to match if they read the same constant buffer
|
||||
if (!lhs.IsImmediate() && !rhs.IsImmediate() &&
|
||||
lhs.Inst()->GetOpcode() == IR::Opcode::GetCbufU32 &&
|
||||
rhs.Inst()->GetOpcode() == IR::Opcode::GetCbufU32 &&
|
||||
lhs.Inst()->Arg(0) == rhs.Inst()->Arg(0) && lhs.Inst()->Arg(1) == rhs.Inst()->Arg(1)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}};
|
||||
return std::ranges::adjacent_find(range, std::not_fn(equal), resolver) == std::end(range);
|
||||
}
|
||||
|
||||
void FoldGetRegister(IR::Inst& inst) {
|
||||
if (inst.Arg(0).Reg() == IR::Reg::RZ) {
|
||||
inst.ReplaceUsesWith(IR::Value{u32{0}});
|
||||
|
@ -100,6 +121,157 @@ void FoldGetPred(IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Replaces the XMAD pattern generated by an integer FMA
|
||||
bool FoldXmadMultiplyAdd(IR::Block& block, IR::Inst& inst) {
|
||||
/*
|
||||
* We are looking for this specific pattern:
|
||||
* %6 = BitFieldUExtract %op_b, #0, #16
|
||||
* %7 = BitFieldUExtract %op_a', #16, #16
|
||||
* %8 = IMul32 %6, %7
|
||||
* %10 = BitFieldUExtract %op_a', #0, #16
|
||||
* %11 = BitFieldInsert %8, %10, #16, #16
|
||||
* %15 = BitFieldUExtract %op_b, #0, #16
|
||||
* %16 = BitFieldUExtract %op_a, #0, #16
|
||||
* %17 = IMul32 %15, %16
|
||||
* %18 = IAdd32 %17, %op_c
|
||||
* %22 = BitFieldUExtract %op_b, #16, #16
|
||||
* %23 = BitFieldUExtract %11, #16, #16
|
||||
* %24 = IMul32 %22, %23
|
||||
* %25 = ShiftLeftLogical32 %24, #16
|
||||
* %26 = ShiftLeftLogical32 %11, #16
|
||||
* %27 = IAdd32 %26, %18
|
||||
* %result = IAdd32 %25, %27
|
||||
*
|
||||
* And replace it with:
|
||||
* %temp = IMul32 %op_a, %op_b
|
||||
* %result = IAdd32 %temp, %op_c
|
||||
*
|
||||
* This optimization has been proven safe by Nvidia's compiler logic being reversed.
|
||||
* (If Nvidia generates this code from 'fma(a, b, c)', we can do the same in the reverse order.)
|
||||
*/
|
||||
const IR::Value zero{0u};
|
||||
const IR::Value sixteen{16u};
|
||||
IR::Inst* const _25{inst.Arg(0).TryInstRecursive()};
|
||||
IR::Inst* const _27{inst.Arg(1).TryInstRecursive()};
|
||||
if (!_25 || !_27) {
|
||||
return false;
|
||||
}
|
||||
if (_27->GetOpcode() != IR::Opcode::IAdd32) {
|
||||
return false;
|
||||
}
|
||||
if (_25->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || _25->Arg(1) != sixteen) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const _24{_25->Arg(0).TryInstRecursive()};
|
||||
if (!_24 || _24->GetOpcode() != IR::Opcode::IMul32) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const _22{_24->Arg(0).TryInstRecursive()};
|
||||
IR::Inst* const _23{_24->Arg(1).TryInstRecursive()};
|
||||
if (!_22 || !_23) {
|
||||
return false;
|
||||
}
|
||||
if (_22->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||
return false;
|
||||
}
|
||||
if (_23->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||
return false;
|
||||
}
|
||||
if (_22->Arg(1) != sixteen || _22->Arg(2) != sixteen) {
|
||||
return false;
|
||||
}
|
||||
if (_23->Arg(1) != sixteen || _23->Arg(2) != sixteen) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const _11{_23->Arg(0).TryInstRecursive()};
|
||||
if (!_11 || _11->GetOpcode() != IR::Opcode::BitFieldInsert) {
|
||||
return false;
|
||||
}
|
||||
if (_11->Arg(2) != sixteen || _11->Arg(3) != sixteen) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const _8{_11->Arg(0).TryInstRecursive()};
|
||||
IR::Inst* const _10{_11->Arg(1).TryInstRecursive()};
|
||||
if (!_8 || !_10) {
|
||||
return false;
|
||||
}
|
||||
if (_8->GetOpcode() != IR::Opcode::IMul32) {
|
||||
return false;
|
||||
}
|
||||
if (_10->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const _6{_8->Arg(0).TryInstRecursive()};
|
||||
IR::Inst* const _7{_8->Arg(1).TryInstRecursive()};
|
||||
if (!_6 || !_7) {
|
||||
return false;
|
||||
}
|
||||
if (_6->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||
return false;
|
||||
}
|
||||
if (_7->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||
return false;
|
||||
}
|
||||
if (_6->Arg(1) != zero || _6->Arg(2) != sixteen) {
|
||||
return false;
|
||||
}
|
||||
if (_7->Arg(1) != sixteen || _7->Arg(2) != sixteen) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const _26{_27->Arg(0).TryInstRecursive()};
|
||||
IR::Inst* const _18{_27->Arg(1).TryInstRecursive()};
|
||||
if (!_26 || !_18) {
|
||||
return false;
|
||||
}
|
||||
if (_26->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || _26->Arg(1) != sixteen) {
|
||||
return false;
|
||||
}
|
||||
if (_26->Arg(0).InstRecursive() != _11) {
|
||||
return false;
|
||||
}
|
||||
if (_18->GetOpcode() != IR::Opcode::IAdd32) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const _17{_18->Arg(0).TryInstRecursive()};
|
||||
if (!_17 || _17->GetOpcode() != IR::Opcode::IMul32) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const _15{_17->Arg(0).TryInstRecursive()};
|
||||
IR::Inst* const _16{_17->Arg(1).TryInstRecursive()};
|
||||
if (!_15 || !_16) {
|
||||
return false;
|
||||
}
|
||||
if (_15->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||
return false;
|
||||
}
|
||||
if (_16->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||
return false;
|
||||
}
|
||||
if (_15->Arg(1) != zero || _16->Arg(1) != zero || _10->Arg(1) != zero) {
|
||||
return false;
|
||||
}
|
||||
if (_15->Arg(2) != sixteen || _16->Arg(2) != sixteen || _10->Arg(2) != sixteen) {
|
||||
return false;
|
||||
}
|
||||
const std::array<IR::Value, 3> op_as{
|
||||
_7->Arg(0).Resolve(),
|
||||
_16->Arg(0).Resolve(),
|
||||
_10->Arg(0).Resolve(),
|
||||
};
|
||||
const std::array<IR::Value, 3> op_bs{
|
||||
_22->Arg(0).Resolve(),
|
||||
_6->Arg(0).Resolve(),
|
||||
_15->Arg(0).Resolve(),
|
||||
};
|
||||
const IR::U32 op_c{_18->Arg(1)};
|
||||
if (!AreEqual(op_as) || !AreEqual(op_bs)) {
|
||||
return false;
|
||||
}
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.ReplaceUsesWith(ir.IAdd(ir.IMul(IR::U32{op_as[0]}, IR::U32{op_bs[1]}), op_c));
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Replaces the pattern generated by two XMAD multiplications
|
||||
bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
||||
/*
|
||||
|
@ -116,33 +288,31 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
|||
*
|
||||
* This optimization has been proven safe by LLVM and MSVC.
|
||||
*/
|
||||
const IR::Value lhs_arg{inst.Arg(0)};
|
||||
const IR::Value rhs_arg{inst.Arg(1)};
|
||||
if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) {
|
||||
IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()};
|
||||
IR::Inst* const rhs_mul{inst.Arg(1).TryInstRecursive()};
|
||||
if (!lhs_shl || !rhs_mul) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
|
||||
if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
|
||||
lhs_shl->Arg(1) != IR::Value{16U}) {
|
||||
return false;
|
||||
}
|
||||
if (lhs_shl->Arg(0).IsImmediate()) {
|
||||
IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()};
|
||||
if (!lhs_mul) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
|
||||
IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
|
||||
if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
|
||||
return false;
|
||||
}
|
||||
if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
|
||||
return false;
|
||||
}
|
||||
const IR::U32 factor_b{lhs_mul->Arg(1)};
|
||||
if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) {
|
||||
if (factor_b.Resolve() != rhs_mul->Arg(1).Resolve()) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const lhs_bfe{lhs_mul->Arg(0).TryInstRecursive()};
|
||||
IR::Inst* const rhs_bfe{rhs_mul->Arg(0).TryInstRecursive()};
|
||||
if (!lhs_bfe || !rhs_bfe) {
|
||||
return false;
|
||||
}
|
||||
IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
|
||||
IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
|
||||
if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||
return false;
|
||||
}
|
||||
|
@ -155,10 +325,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
|||
if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
|
||||
return false;
|
||||
}
|
||||
if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) {
|
||||
const IR::U32 factor_a{lhs_bfe->Arg(0)};
|
||||
if (factor_a.Resolve() != rhs_bfe->Arg(0).Resolve()) {
|
||||
return false;
|
||||
}
|
||||
const IR::U32 factor_a{lhs_bfe->Arg(0)};
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
|
||||
return true;
|
||||
|
@ -181,6 +351,9 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
|
|||
if (FoldXmadMultiply(block, inst)) {
|
||||
return;
|
||||
}
|
||||
if (FoldXmadMultiplyAdd(block, inst)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ void RendererBase::UpdateCurrentFramebufferLayout() {
|
|||
render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height);
|
||||
}
|
||||
|
||||
void RendererBase::RequestScreenshot(void* data, std::function<void()> callback,
|
||||
void RendererBase::RequestScreenshot(void* data, std::function<void(bool)> callback,
|
||||
const Layout::FramebufferLayout& layout) {
|
||||
if (renderer_settings.screenshot_requested) {
|
||||
LOG_ERROR(Render, "A screenshot is already requested or in progress, ignoring the request");
|
||||
|
|
|
@ -24,7 +24,7 @@ struct RendererSettings {
|
|||
// Screenshot
|
||||
std::atomic<bool> screenshot_requested{false};
|
||||
void* screenshot_bits{};
|
||||
std::function<void()> screenshot_complete_callback;
|
||||
std::function<void(bool)> screenshot_complete_callback;
|
||||
Layout::FramebufferLayout screenshot_framebuffer_layout;
|
||||
};
|
||||
|
||||
|
@ -80,7 +80,7 @@ public:
|
|||
void RefreshBaseSettings();
|
||||
|
||||
/// Request a screenshot of the next frame
|
||||
void RequestScreenshot(void* data, std::function<void()> callback,
|
||||
void RequestScreenshot(void* data, std::function<void(bool)> callback,
|
||||
const Layout::FramebufferLayout& layout);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -486,7 +486,7 @@ void RendererOpenGL::RenderScreenshot() {
|
|||
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
|
||||
|
||||
renderer_settings.screenshot_complete_callback();
|
||||
renderer_settings.screenshot_complete_callback(true);
|
||||
renderer_settings.screenshot_requested = false;
|
||||
}
|
||||
|
||||
|
|
|
@ -138,6 +138,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
|||
const bool use_accelerated =
|
||||
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
|
||||
const bool is_srgb = use_accelerated && screen_info.is_srgb;
|
||||
RenderScreenshot(*framebuffer, use_accelerated);
|
||||
|
||||
bool has_been_recreated = false;
|
||||
const auto recreate_swapchain = [&] {
|
||||
|
@ -162,7 +163,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
|||
if (has_been_recreated) {
|
||||
blit_screen.Recreate();
|
||||
}
|
||||
const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
|
||||
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
|
||||
scheduler.Flush(render_semaphore);
|
||||
scheduler.WaitWorker();
|
||||
swapchain.Present(render_semaphore);
|
||||
|
@ -193,4 +194,153 @@ void RendererVulkan::Report() const {
|
|||
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
|
||||
}
|
||||
|
||||
void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer,
|
||||
bool use_accelerated) {
|
||||
if (!renderer_settings.screenshot_requested) {
|
||||
return;
|
||||
}
|
||||
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
|
||||
vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = VK_FORMAT_B8G8R8A8_UNORM,
|
||||
.extent =
|
||||
{
|
||||
.width = layout.width,
|
||||
.height = layout.height,
|
||||
.depth = 1,
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
});
|
||||
const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
|
||||
|
||||
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.image = *staging_image,
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = screen_info.is_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM,
|
||||
.components{
|
||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
});
|
||||
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
|
||||
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
|
||||
// Since we're not rendering to the screen, ignore the render semaphore.
|
||||
void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
|
||||
|
||||
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
|
||||
const VkBufferCreateInfo dst_buffer_info{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = buffer_size,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
|
||||
MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = *staging_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const VkImageMemoryBarrier image_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = *staging_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
static constexpr VkMemoryBarrier memory_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
const VkBufferImageCopy copy{
|
||||
.bufferOffset = 0,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset{.x = 0, .y = 0, .z = 0},
|
||||
.imageExtent{
|
||||
.width = layout.width,
|
||||
.height = layout.height,
|
||||
.depth = 1,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, read_barrier);
|
||||
cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer,
|
||||
copy);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, memory_write_barrier, nullptr, image_write_barrier);
|
||||
});
|
||||
// Ensure the copy is fully completed before saving the screenshot
|
||||
scheduler.Finish();
|
||||
|
||||
// Copy backing image data to the QImage screenshot buffer
|
||||
const auto dst_memory_map = dst_buffer_memory.Map();
|
||||
std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
|
||||
renderer_settings.screenshot_complete_callback(false);
|
||||
renderer_settings.screenshot_requested = false;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -54,6 +54,8 @@ public:
|
|||
private:
|
||||
void Report() const;
|
||||
|
||||
void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated);
|
||||
|
||||
Core::TelemetrySession& telemetry_session;
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
Tegra::GPU& gpu;
|
||||
|
|
|
@ -130,7 +130,10 @@ void VKBlitScreen::Recreate() {
|
|||
CreateDynamicResources();
|
||||
}
|
||||
|
||||
VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) {
|
||||
VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
|
||||
const VkFramebuffer& host_framebuffer,
|
||||
const Layout::FramebufferLayout layout, VkExtent2D render_area,
|
||||
bool use_accelerated) {
|
||||
RefreshResources(framebuffer);
|
||||
|
||||
// Finish any pending renderpass
|
||||
|
@ -145,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
|||
use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
|
||||
|
||||
BufferData data;
|
||||
SetUniformData(data, framebuffer);
|
||||
SetVertexData(data, framebuffer);
|
||||
SetUniformData(data, layout);
|
||||
SetVertexData(data, framebuffer, layout);
|
||||
|
||||
const std::span<u8> mapped_span = buffer_commit.Map();
|
||||
std::memcpy(mapped_span.data(), &data, sizeof(data));
|
||||
|
@ -220,52 +223,75 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
|||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
|
||||
});
|
||||
}
|
||||
scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) {
|
||||
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
|
||||
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
|
||||
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
|
||||
const VkClearValue clear_color{
|
||||
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
|
||||
};
|
||||
const VkRenderPassBeginInfo renderpass_bi{
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.pNext = nullptr,
|
||||
.renderPass = *renderpass,
|
||||
.framebuffer = *framebuffers[image_index],
|
||||
.renderArea =
|
||||
{
|
||||
.offset = {0, 0},
|
||||
.extent = size,
|
||||
},
|
||||
.clearValueCount = 1,
|
||||
.pClearValues = &clear_color,
|
||||
};
|
||||
const VkViewport viewport{
|
||||
.x = 0.0f,
|
||||
.y = 0.0f,
|
||||
.width = static_cast<float>(size.width),
|
||||
.height = static_cast<float>(size.height),
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 1.0f,
|
||||
};
|
||||
const VkRect2D scissor{
|
||||
.offset = {0, 0},
|
||||
.extent = size,
|
||||
};
|
||||
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
|
||||
cmdbuf.SetViewport(0, viewport);
|
||||
cmdbuf.SetScissor(0, scissor);
|
||||
scheduler.Record(
|
||||
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
|
||||
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
|
||||
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
|
||||
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
|
||||
const VkClearValue clear_color{
|
||||
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
|
||||
};
|
||||
const VkRenderPassBeginInfo renderpass_bi{
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.pNext = nullptr,
|
||||
.renderPass = *renderpass,
|
||||
.framebuffer = host_framebuffer,
|
||||
.renderArea =
|
||||
{
|
||||
.offset = {0, 0},
|
||||
.extent = size,
|
||||
},
|
||||
.clearValueCount = 1,
|
||||
.pClearValues = &clear_color,
|
||||
};
|
||||
const VkViewport viewport{
|
||||
.x = 0.0f,
|
||||
.y = 0.0f,
|
||||
.width = static_cast<float>(size.width),
|
||||
.height = static_cast<float>(size.height),
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 1.0f,
|
||||
};
|
||||
const VkRect2D scissor{
|
||||
.offset = {0, 0},
|
||||
.extent = size,
|
||||
};
|
||||
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
|
||||
cmdbuf.SetViewport(0, viewport);
|
||||
cmdbuf.SetScissor(0, scissor);
|
||||
|
||||
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
|
||||
descriptor_sets[image_index], {});
|
||||
cmdbuf.Draw(4, 1, 0, 0);
|
||||
cmdbuf.EndRenderPass();
|
||||
});
|
||||
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
|
||||
descriptor_sets[image_index], {});
|
||||
cmdbuf.Draw(4, 1, 0, 0);
|
||||
cmdbuf.EndRenderPass();
|
||||
});
|
||||
return *semaphores[image_index];
|
||||
}
|
||||
|
||||
VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
|
||||
bool use_accelerated) {
|
||||
const std::size_t image_index = swapchain.GetImageIndex();
|
||||
const VkExtent2D render_area = swapchain.GetSize();
|
||||
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
|
||||
return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated);
|
||||
}
|
||||
|
||||
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
|
||||
return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.renderPass = *renderpass,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &image_view,
|
||||
.width = extent.width,
|
||||
.height = extent.height,
|
||||
.layers = 1,
|
||||
});
|
||||
}
|
||||
|
||||
void VKBlitScreen::CreateStaticResources() {
|
||||
CreateShaders();
|
||||
CreateSemaphores();
|
||||
|
@ -752,15 +778,13 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
|
|||
device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {});
|
||||
}
|
||||
|
||||
void VKBlitScreen::SetUniformData(BufferData& data,
|
||||
const Tegra::FramebufferConfig& framebuffer) const {
|
||||
const auto& layout = render_window.GetFramebufferLayout();
|
||||
void VKBlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const {
|
||||
data.uniform.modelview_matrix =
|
||||
MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height));
|
||||
}
|
||||
|
||||
void VKBlitScreen::SetVertexData(BufferData& data,
|
||||
const Tegra::FramebufferConfig& framebuffer) const {
|
||||
void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
|
||||
const Layout::FramebufferLayout layout) const {
|
||||
const auto& framebuffer_transform_flags = framebuffer.transform_flags;
|
||||
const auto& framebuffer_crop_rect = framebuffer.crop_rect;
|
||||
|
||||
|
@ -798,7 +822,7 @@ void VKBlitScreen::SetVertexData(BufferData& data,
|
|||
static_cast<f32>(screen_info.height);
|
||||
}
|
||||
|
||||
const auto& screen = render_window.GetFramebufferLayout().screen;
|
||||
const auto& screen = layout.screen;
|
||||
const auto x = static_cast<f32>(screen.left);
|
||||
const auto y = static_cast<f32>(screen.top);
|
||||
const auto w = static_cast<f32>(screen.GetWidth());
|
||||
|
|
|
@ -56,8 +56,16 @@ public:
|
|||
void Recreate();
|
||||
|
||||
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
|
||||
const VkFramebuffer& host_framebuffer,
|
||||
const Layout::FramebufferLayout layout, VkExtent2D render_area,
|
||||
bool use_accelerated);
|
||||
|
||||
[[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
|
||||
bool use_accelerated);
|
||||
|
||||
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
|
||||
VkExtent2D extent);
|
||||
|
||||
private:
|
||||
struct BufferData;
|
||||
|
||||
|
@ -81,8 +89,9 @@ private:
|
|||
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
|
||||
|
||||
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
|
||||
void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
|
||||
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
|
||||
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
|
||||
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
|
||||
const Layout::FramebufferLayout layout) const;
|
||||
|
||||
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
|
||||
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
|
||||
|
|
|
@ -634,9 +634,9 @@ void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_p
|
|||
screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32);
|
||||
renderer.RequestScreenshot(
|
||||
screenshot_image.bits(),
|
||||
[=, this] {
|
||||
[=, this](bool invert_y) {
|
||||
const std::string std_screenshot_path = screenshot_path.toStdString();
|
||||
if (screenshot_image.mirrored(false, true).save(screenshot_path)) {
|
||||
if (screenshot_image.mirrored(false, invert_y).save(screenshot_path)) {
|
||||
LOG_INFO(Frontend, "Screenshot saved to \"{}\"", std_screenshot_path);
|
||||
} else {
|
||||
LOG_ERROR(Frontend, "Failed to save screenshot to \"{}\"", std_screenshot_path);
|
||||
|
|
Loading…
Reference in a new issue