early-access version 4000
This commit is contained in:
parent
1996a32f94
commit
49f8c57d33
18 changed files with 680 additions and 72 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 3999.
|
This is the source code for early-access 4000.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
2
externals/CMakeLists.txt
vendored
2
externals/CMakeLists.txt
vendored
|
@ -193,7 +193,7 @@ if (ANDROID)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
if (UNIX AND NOT APPLE)
|
||||||
add_subdirectory(gamemode)
|
add_subdirectory(gamemode)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -182,7 +182,7 @@ if(ANDROID)
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
if (UNIX AND NOT APPLE)
|
||||||
target_sources(common PRIVATE
|
target_sources(common PRIVATE
|
||||||
linux/gamemode.cpp
|
linux/gamemode.cpp
|
||||||
linux/gamemode.h
|
linux/gamemode.h
|
||||||
|
|
|
@ -935,8 +935,8 @@ if (HAS_NCE)
|
||||||
arm/nce/arm_nce.h
|
arm/nce/arm_nce.h
|
||||||
arm/nce/arm_nce.s
|
arm/nce/arm_nce.s
|
||||||
arm/nce/guest_context.h
|
arm/nce/guest_context.h
|
||||||
arm/nce/patch.cpp
|
arm/nce/patcher.cpp
|
||||||
arm/nce/patch.h
|
arm/nce/patcher.h
|
||||||
arm/nce/instructions.h
|
arm/nce/instructions.h
|
||||||
)
|
)
|
||||||
target_link_libraries(core PRIVATE merry::oaknut)
|
target_link_libraries(core PRIVATE merry::oaknut)
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
#include "common/signal_chain.h"
|
#include "common/signal_chain.h"
|
||||||
#include "core/arm/nce/arm_nce.h"
|
#include "core/arm/nce/arm_nce.h"
|
||||||
#include "core/arm/nce/patch.h"
|
#include "core/arm/nce/patcher.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
|
||||||
|
|
474
src/core/arm/nce/patcher.cpp
Executable file
474
src/core/arm/nce/patcher.cpp
Executable file
|
@ -0,0 +1,474 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "common/arm64/native_clock.h"
|
||||||
|
#include "common/bit_cast.h"
|
||||||
|
#include "common/literals.h"
|
||||||
|
#include "core/arm/nce/arm_nce.h"
|
||||||
|
#include "core/arm/nce/guest_context.h"
|
||||||
|
#include "core/arm/nce/instructions.h"
|
||||||
|
#include "core/arm/nce/patcher.h"
|
||||||
|
#include "core/core.h"
|
||||||
|
#include "core/core_timing.h"
|
||||||
|
#include "core/hle/kernel/svc.h"
|
||||||
|
|
||||||
|
namespace Core::NCE {
|
||||||
|
|
||||||
|
using namespace Common::Literals;
|
||||||
|
using namespace oaknut::util;
|
||||||
|
|
||||||
|
using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
|
||||||
|
|
||||||
|
constexpr size_t MaxRelativeBranch = 128_MiB;
|
||||||
|
constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32);
|
||||||
|
|
||||||
|
Patcher::Patcher() : c(m_patch_instructions) {}
|
||||||
|
|
||||||
|
Patcher::~Patcher() = default;
|
||||||
|
|
||||||
|
void Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
|
||||||
|
const Kernel::CodeSet::Segment& code) {
|
||||||
|
|
||||||
|
// Write save context helper function.
|
||||||
|
c.l(m_save_context);
|
||||||
|
WriteSaveContext();
|
||||||
|
|
||||||
|
// Write load context helper function.
|
||||||
|
c.l(m_load_context);
|
||||||
|
WriteLoadContext();
|
||||||
|
|
||||||
|
// Retrieve text segment data.
|
||||||
|
const auto text = std::span{program_image}.subspan(code.offset, code.size);
|
||||||
|
const auto text_words =
|
||||||
|
std::span<const u32>{reinterpret_cast<const u32*>(text.data()), text.size() / sizeof(u32)};
|
||||||
|
|
||||||
|
// Loop through instructions, patching as needed.
|
||||||
|
for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) {
|
||||||
|
const u32 inst = text_words[i];
|
||||||
|
|
||||||
|
const auto AddRelocations = [&] {
|
||||||
|
const uintptr_t this_offset = i * sizeof(u32);
|
||||||
|
const uintptr_t next_offset = this_offset + sizeof(u32);
|
||||||
|
|
||||||
|
// Relocate from here to patch.
|
||||||
|
this->BranchToPatch(this_offset);
|
||||||
|
|
||||||
|
// Relocate from patch to next instruction.
|
||||||
|
return next_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
// SVC
|
||||||
|
if (auto svc = SVC{inst}; svc.Verify()) {
|
||||||
|
WriteSvcTrampoline(AddRelocations(), svc.GetValue());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// MRS Xn, TPIDR_EL0
|
||||||
|
// MRS Xn, TPIDRRO_EL0
|
||||||
|
if (auto mrs = MRS{inst};
|
||||||
|
mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) {
|
||||||
|
const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0
|
||||||
|
: oaknut::SystemReg::TPIDR_EL0;
|
||||||
|
const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())};
|
||||||
|
WriteMrsHandler(AddRelocations(), dest_reg, src_reg);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// MRS Xn, CNTPCT_EL0
|
||||||
|
if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) {
|
||||||
|
WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// MRS Xn, CNTFRQ_EL0
|
||||||
|
if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
// MSR TPIDR_EL0, Xn
|
||||||
|
if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) {
|
||||||
|
WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auto exclusive = Exclusive{inst}; exclusive.Verify()) {
|
||||||
|
m_exclusives.push_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine patching mode for the final relocation step
|
||||||
|
const size_t image_size = program_image.size();
|
||||||
|
this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
|
||||||
|
const Kernel::CodeSet::Segment& code,
|
||||||
|
Kernel::PhysicalMemory& program_image,
|
||||||
|
EntryTrampolines* out_trampolines) {
|
||||||
|
const size_t patch_size = GetSectionSize();
|
||||||
|
const size_t image_size = program_image.size();
|
||||||
|
|
||||||
|
// Retrieve text segment data.
|
||||||
|
const auto text = std::span{program_image}.subspan(code.offset, code.size);
|
||||||
|
const auto text_words =
|
||||||
|
std::span<u32>{reinterpret_cast<u32*>(text.data()), text.size() / sizeof(u32)};
|
||||||
|
|
||||||
|
const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) {
|
||||||
|
oaknut::CodeGenerator rc{target};
|
||||||
|
if (mode == PatchMode::PreText) {
|
||||||
|
rc.B(rel.patch_offset - patch_size - rel.module_offset);
|
||||||
|
} else {
|
||||||
|
rc.B(image_size - rel.module_offset + rel.patch_offset);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) {
|
||||||
|
oaknut::CodeGenerator rc{target};
|
||||||
|
if (mode == PatchMode::PreText) {
|
||||||
|
rc.B(patch_size - rel.patch_offset + rel.module_offset);
|
||||||
|
} else {
|
||||||
|
rc.B(rel.module_offset - image_size - rel.patch_offset);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto RebasePatch = [&](ptrdiff_t patch_offset) {
|
||||||
|
if (mode == PatchMode::PreText) {
|
||||||
|
return GetInteger(load_base) + patch_offset;
|
||||||
|
} else {
|
||||||
|
return GetInteger(load_base) + image_size + patch_offset;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto RebasePc = [&](uintptr_t module_offset) {
|
||||||
|
if (mode == PatchMode::PreText) {
|
||||||
|
return GetInteger(load_base) + patch_size + module_offset;
|
||||||
|
} else {
|
||||||
|
return GetInteger(load_base) + module_offset;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// We are now ready to relocate!
|
||||||
|
for (const Relocation& rel : m_branch_to_patch_relocations) {
|
||||||
|
ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel);
|
||||||
|
}
|
||||||
|
for (const Relocation& rel : m_branch_to_module_relocations) {
|
||||||
|
ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32),
|
||||||
|
rel);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rewrite PC constants and record post trampolines
|
||||||
|
for (const Relocation& rel : m_write_module_pc_relocations) {
|
||||||
|
oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)};
|
||||||
|
rc.dx(RebasePc(rel.module_offset));
|
||||||
|
}
|
||||||
|
for (const Trampoline& rel : m_trampolines) {
|
||||||
|
out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not.
|
||||||
|
// Convert to ordered to preserve this assumption.
|
||||||
|
for (const ModuleTextAddress i : m_exclusives) {
|
||||||
|
auto exclusive = Exclusive{text_words[i]};
|
||||||
|
text_words[i] = exclusive.AsOrdered();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy to program image
|
||||||
|
if (this->mode == PatchMode::PreText) {
|
||||||
|
std::memcpy(program_image.data(), m_patch_instructions.data(),
|
||||||
|
m_patch_instructions.size() * sizeof(u32));
|
||||||
|
} else {
|
||||||
|
program_image.resize(image_size + patch_size);
|
||||||
|
std::memcpy(program_image.data() + image_size, m_patch_instructions.data(),
|
||||||
|
m_patch_instructions.size() * sizeof(u32));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Patcher::GetSectionSize() const noexcept {
|
||||||
|
return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Patcher::WriteLoadContext() {
|
||||||
|
// This function was called, which modifies X30, so use that as a scratch register.
|
||||||
|
// SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes
|
||||||
|
// of stack.
|
||||||
|
c.STR(X30, SP, 8);
|
||||||
|
c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
|
||||||
|
c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
|
||||||
|
|
||||||
|
// Load system registers.
|
||||||
|
c.LDR(W0, X30, offsetof(GuestContext, fpsr));
|
||||||
|
c.MSR(oaknut::SystemReg::FPSR, X0);
|
||||||
|
c.LDR(W0, X30, offsetof(GuestContext, fpcr));
|
||||||
|
c.MSR(oaknut::SystemReg::FPCR, X0);
|
||||||
|
c.LDR(W0, X30, offsetof(GuestContext, nzcv));
|
||||||
|
c.MSR(oaknut::SystemReg::NZCV, X0);
|
||||||
|
|
||||||
|
// Load all vector registers.
|
||||||
|
static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
|
||||||
|
for (int i = 0; i <= 30; i += 2) {
|
||||||
|
c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load all general-purpose registers except X30.
|
||||||
|
for (int i = 0; i <= 28; i += 2) {
|
||||||
|
c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reload our return X30 from the stack and return.
|
||||||
|
// The patch code will reload the guest X30 for us.
|
||||||
|
c.LDR(X30, SP, 8);
|
||||||
|
c.RET();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Patcher::WriteSaveContext() {
|
||||||
|
// This function was called, which modifies X30, so use that as a scratch register.
|
||||||
|
// SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of
|
||||||
|
// stack.
|
||||||
|
c.STR(X30, SP, 8);
|
||||||
|
c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
|
||||||
|
c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
|
||||||
|
|
||||||
|
// Store all general-purpose registers except X30.
|
||||||
|
for (int i = 0; i <= 28; i += 2) {
|
||||||
|
c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store all vector registers.
|
||||||
|
static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
|
||||||
|
for (int i = 0; i <= 30; i += 2) {
|
||||||
|
c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store guest system registers, X30 and SP, using X0 as a scratch register.
|
||||||
|
c.STR(X0, SP, PRE_INDEXED, -16);
|
||||||
|
c.LDR(X0, SP, 16);
|
||||||
|
c.STR(X0, X30, 8 * 30);
|
||||||
|
c.ADD(X0, SP, 32);
|
||||||
|
c.STR(X0, X30, offsetof(GuestContext, sp));
|
||||||
|
c.MRS(X0, oaknut::SystemReg::FPSR);
|
||||||
|
c.STR(W0, X30, offsetof(GuestContext, fpsr));
|
||||||
|
c.MRS(X0, oaknut::SystemReg::FPCR);
|
||||||
|
c.STR(W0, X30, offsetof(GuestContext, fpcr));
|
||||||
|
c.MRS(X0, oaknut::SystemReg::NZCV);
|
||||||
|
c.STR(W0, X30, offsetof(GuestContext, nzcv));
|
||||||
|
c.LDR(X0, SP, POST_INDEXED, 16);
|
||||||
|
|
||||||
|
// Reload our return X30 from the stack, and return.
|
||||||
|
c.LDR(X30, SP, 8);
|
||||||
|
c.RET();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) {
|
||||||
|
// We are about to start saving state, so we need to lock the context.
|
||||||
|
this->LockContext();
|
||||||
|
|
||||||
|
// Store guest X30 to the stack. Then, save the context and restore the stack.
|
||||||
|
// This will save all registers except PC, but we know PC at patch time.
|
||||||
|
c.STR(X30, SP, PRE_INDEXED, -16);
|
||||||
|
c.BL(m_save_context);
|
||||||
|
c.LDR(X30, SP, POST_INDEXED, 16);
|
||||||
|
|
||||||
|
// Now that we've saved all registers, we can use any registers as scratch.
|
||||||
|
// Store PC + 4 to arm interface, since we know the instruction offset from the entry point.
|
||||||
|
oaknut::Label pc_after_svc;
|
||||||
|
c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
|
||||||
|
c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
|
||||||
|
c.LDR(X2, pc_after_svc);
|
||||||
|
c.STR(X2, X1, offsetof(GuestContext, pc));
|
||||||
|
|
||||||
|
// Store SVC number to execute when we return
|
||||||
|
c.MOV(X2, svc_id);
|
||||||
|
c.STR(W2, X1, offsetof(GuestContext, svc_swi));
|
||||||
|
|
||||||
|
// We are calling a SVC. Clear esr_el1 and return it.
|
||||||
|
static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>);
|
||||||
|
oaknut::Label retry;
|
||||||
|
c.ADD(X2, X1, offsetof(GuestContext, esr_el1));
|
||||||
|
c.l(retry);
|
||||||
|
c.LDAXR(X0, X2);
|
||||||
|
c.STLXR(W3, XZR, X2);
|
||||||
|
c.CBNZ(W3, retry);
|
||||||
|
|
||||||
|
// Add "calling SVC" flag. Since this is X0, this is now our return value.
|
||||||
|
c.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall));
|
||||||
|
|
||||||
|
// Offset the GuestContext pointer to the HostContext member.
|
||||||
|
// STP has limited range of [-512, 504] which we can't reach otherwise
|
||||||
|
// NB: Due to this all offsets below are from the start of HostContext.
|
||||||
|
c.ADD(X1, X1, offsetof(GuestContext, host_ctx));
|
||||||
|
|
||||||
|
// Reload host TPIDR_EL0 and SP.
|
||||||
|
static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0));
|
||||||
|
c.LDP(X2, X3, X1, offsetof(HostContext, host_sp));
|
||||||
|
c.MOV(SP, X2);
|
||||||
|
c.MSR(oaknut::SystemReg::TPIDR_EL0, X3);
|
||||||
|
|
||||||
|
// Load callee-saved host registers and return to host.
|
||||||
|
static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs);
|
||||||
|
static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs);
|
||||||
|
c.LDP(X19, X20, X1, HOST_REGS_OFF);
|
||||||
|
c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64));
|
||||||
|
c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64));
|
||||||
|
c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64));
|
||||||
|
c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64));
|
||||||
|
c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64));
|
||||||
|
c.LDP(Q8, Q9, X1, HOST_VREGS_OFF);
|
||||||
|
c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128));
|
||||||
|
c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128));
|
||||||
|
c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128));
|
||||||
|
c.RET();
|
||||||
|
|
||||||
|
// Write the post-SVC trampoline address, which will jump back to the guest after restoring its
|
||||||
|
// state.
|
||||||
|
m_trampolines.push_back({c.offset(), module_dest});
|
||||||
|
|
||||||
|
// Host called this location. Save the return address so we can
|
||||||
|
// unwind the stack properly when jumping back.
|
||||||
|
c.MRS(X2, oaknut::SystemReg::TPIDR_EL0);
|
||||||
|
c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context));
|
||||||
|
c.ADD(X0, X2, offsetof(GuestContext, host_ctx));
|
||||||
|
c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64));
|
||||||
|
|
||||||
|
// Reload all guest registers except X30 and PC.
|
||||||
|
// The function also expects 16 bytes of stack already allocated.
|
||||||
|
c.STR(X30, SP, PRE_INDEXED, -16);
|
||||||
|
c.BL(m_load_context);
|
||||||
|
c.LDR(X30, SP, POST_INDEXED, 16);
|
||||||
|
|
||||||
|
// Use X1 as a scratch register to restore X30.
|
||||||
|
c.STR(X1, SP, PRE_INDEXED, -16);
|
||||||
|
c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
|
||||||
|
c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
|
||||||
|
c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30);
|
||||||
|
c.LDR(X1, SP, POST_INDEXED, 16);
|
||||||
|
|
||||||
|
// Unlock the context.
|
||||||
|
this->UnlockContext();
|
||||||
|
|
||||||
|
// Jump back to the instruction after the emulated SVC.
|
||||||
|
this->BranchToModule(module_dest);
|
||||||
|
|
||||||
|
// Store PC after call.
|
||||||
|
c.l(pc_after_svc);
|
||||||
|
this->WriteModulePc(module_dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
|
||||||
|
oaknut::SystemReg src_reg) {
|
||||||
|
// Retrieve emulated TLS register from GuestContext.
|
||||||
|
c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0);
|
||||||
|
if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) {
|
||||||
|
c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0));
|
||||||
|
} else {
|
||||||
|
c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Jump back to the instruction after the emulated MRS.
|
||||||
|
this->BranchToModule(module_dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) {
|
||||||
|
const auto scratch_reg = src_reg.index() == 0 ? X1 : X0;
|
||||||
|
c.STR(scratch_reg, SP, PRE_INDEXED, -16);
|
||||||
|
|
||||||
|
// Save guest value to NativeExecutionParameters::tpidr_el0.
|
||||||
|
c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0);
|
||||||
|
c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0));
|
||||||
|
|
||||||
|
// Restore scratch register.
|
||||||
|
c.LDR(scratch_reg, SP, POST_INDEXED, 16);
|
||||||
|
|
||||||
|
// Jump back to the instruction after the emulated MSR.
|
||||||
|
this->BranchToModule(module_dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) {
|
||||||
|
static Common::Arm64::NativeClock clock{};
|
||||||
|
const auto factor = clock.GetGuestCNTFRQFactor();
|
||||||
|
const auto raw_factor = Common::BitCast<std::array<u64, 2>>(factor);
|
||||||
|
|
||||||
|
const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1;
|
||||||
|
oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0;
|
||||||
|
oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1;
|
||||||
|
|
||||||
|
oaknut::Label factorlo;
|
||||||
|
oaknut::Label factorhi;
|
||||||
|
|
||||||
|
// Save scratches.
|
||||||
|
c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16);
|
||||||
|
|
||||||
|
// Load counter value.
|
||||||
|
c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0);
|
||||||
|
|
||||||
|
// Load scaling factor.
|
||||||
|
c.LDR(scratch0, factorlo);
|
||||||
|
c.LDR(scratch1, factorhi);
|
||||||
|
|
||||||
|
// Multiply low bits and get result.
|
||||||
|
c.UMULH(scratch0, dest_reg, scratch0);
|
||||||
|
|
||||||
|
// Multiply high bits and add low bit result.
|
||||||
|
c.MADD(dest_reg, dest_reg, scratch1, scratch0);
|
||||||
|
|
||||||
|
// Reload scratches.
|
||||||
|
c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16);
|
||||||
|
|
||||||
|
// Jump back to the instruction after the emulated MRS.
|
||||||
|
this->BranchToModule(module_dest);
|
||||||
|
|
||||||
|
// Scaling factor constant values.
|
||||||
|
c.l(factorlo);
|
||||||
|
c.dx(raw_factor[0]);
|
||||||
|
c.l(factorhi);
|
||||||
|
c.dx(raw_factor[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Patcher::LockContext() {
|
||||||
|
oaknut::Label retry;
|
||||||
|
|
||||||
|
// Save scratches.
|
||||||
|
c.STP(X0, X1, SP, PRE_INDEXED, -16);
|
||||||
|
|
||||||
|
// Reload lock pointer.
|
||||||
|
c.l(retry);
|
||||||
|
c.CLREX();
|
||||||
|
c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
|
||||||
|
c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
|
||||||
|
|
||||||
|
static_assert(SpinLockLocked == 0);
|
||||||
|
|
||||||
|
// Load-linked with acquire ordering.
|
||||||
|
c.LDAXR(W1, X0);
|
||||||
|
|
||||||
|
// If the value was SpinLockLocked, clear monitor and retry.
|
||||||
|
c.CBZ(W1, retry);
|
||||||
|
|
||||||
|
// Store-conditional SpinLockLocked with relaxed ordering.
|
||||||
|
c.STXR(W1, WZR, X0);
|
||||||
|
|
||||||
|
// If we failed to store, retry.
|
||||||
|
c.CBNZ(W1, retry);
|
||||||
|
|
||||||
|
// We succeeded! Reload scratches.
|
||||||
|
c.LDP(X0, X1, SP, POST_INDEXED, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Patcher::UnlockContext() {
|
||||||
|
// Save scratches.
|
||||||
|
c.STP(X0, X1, SP, PRE_INDEXED, -16);
|
||||||
|
|
||||||
|
// Load lock pointer.
|
||||||
|
c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
|
||||||
|
c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
|
||||||
|
|
||||||
|
// Load SpinLockUnlocked.
|
||||||
|
c.MOV(W1, SpinLockUnlocked);
|
||||||
|
|
||||||
|
// Store value with release ordering.
|
||||||
|
c.STLR(W1, X0);
|
||||||
|
|
||||||
|
// Load scratches.
|
||||||
|
c.LDP(X0, X1, SP, POST_INDEXED, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Core::NCE
|
98
src/core/arm/nce/patcher.h
Executable file
98
src/core/arm/nce/patcher.h
Executable file
|
@ -0,0 +1,98 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <span>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
#include <oaknut/code_block.hpp>
|
||||||
|
#include <oaknut/oaknut.hpp>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "core/hle/kernel/code_set.h"
|
||||||
|
#include "core/hle/kernel/k_typed_address.h"
|
||||||
|
#include "core/hle/kernel/physical_memory.h"
|
||||||
|
|
||||||
|
namespace Core::NCE {
|
||||||
|
|
||||||
|
enum class PatchMode : u32 {
|
||||||
|
None,
|
||||||
|
PreText, ///< Patch section is inserted before .text
|
||||||
|
PostData, ///< Patch section is inserted after .data
|
||||||
|
};
|
||||||
|
|
||||||
|
using ModuleTextAddress = u64;
|
||||||
|
using PatchTextAddress = u64;
|
||||||
|
using EntryTrampolines = std::unordered_map<ModuleTextAddress, PatchTextAddress>;
|
||||||
|
|
||||||
|
class Patcher {
|
||||||
|
public:
|
||||||
|
explicit Patcher();
|
||||||
|
~Patcher();
|
||||||
|
|
||||||
|
void PatchText(const Kernel::PhysicalMemory& program_image,
|
||||||
|
const Kernel::CodeSet::Segment& code);
|
||||||
|
void RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
|
||||||
|
Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines);
|
||||||
|
size_t GetSectionSize() const noexcept;
|
||||||
|
|
||||||
|
[[nodiscard]] PatchMode GetPatchMode() const noexcept {
|
||||||
|
return mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
using ModuleDestLabel = uintptr_t;
|
||||||
|
|
||||||
|
struct Trampoline {
|
||||||
|
ptrdiff_t patch_offset;
|
||||||
|
uintptr_t module_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
void WriteLoadContext();
|
||||||
|
void WriteSaveContext();
|
||||||
|
void LockContext();
|
||||||
|
void UnlockContext();
|
||||||
|
void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id);
|
||||||
|
void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
|
||||||
|
oaknut::SystemReg src_reg);
|
||||||
|
void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg);
|
||||||
|
void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void BranchToPatch(uintptr_t module_dest) {
|
||||||
|
m_branch_to_patch_relocations.push_back({c.offset(), module_dest});
|
||||||
|
}
|
||||||
|
|
||||||
|
void BranchToModule(uintptr_t module_dest) {
|
||||||
|
m_branch_to_module_relocations.push_back({c.offset(), module_dest});
|
||||||
|
c.dw(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteModulePc(uintptr_t module_dest) {
|
||||||
|
m_write_module_pc_relocations.push_back({c.offset(), module_dest});
|
||||||
|
c.dx(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// List of patch instructions we have generated.
|
||||||
|
std::vector<u32> m_patch_instructions{};
|
||||||
|
|
||||||
|
// Relocation type for relative branch from module to patch.
|
||||||
|
struct Relocation {
|
||||||
|
ptrdiff_t patch_offset; ///< Offset in bytes from the start of the patch section.
|
||||||
|
uintptr_t module_offset; ///< Offset in bytes from the start of the text section.
|
||||||
|
};
|
||||||
|
|
||||||
|
oaknut::VectorCodeGenerator c;
|
||||||
|
std::vector<Trampoline> m_trampolines;
|
||||||
|
std::vector<Relocation> m_branch_to_patch_relocations{};
|
||||||
|
std::vector<Relocation> m_branch_to_module_relocations{};
|
||||||
|
std::vector<Relocation> m_write_module_pc_relocations{};
|
||||||
|
std::vector<ModuleTextAddress> m_exclusives{};
|
||||||
|
oaknut::Label m_save_context{};
|
||||||
|
oaknut::Label m_load_context{};
|
||||||
|
PatchMode mode{PatchMode::None};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Core::NCE
|
|
@ -16,7 +16,7 @@
|
||||||
#include "core/loader/nso.h"
|
#include "core/loader/nso.h"
|
||||||
|
|
||||||
#ifdef HAS_NCE
|
#ifdef HAS_NCE
|
||||||
#include "core/arm/nce/patch.h"
|
#include "core/arm/nce/patcher.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Loader {
|
namespace Loader {
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
|
||||||
#ifdef HAS_NCE
|
#ifdef HAS_NCE
|
||||||
#include "core/arm/nce/patch.h"
|
#include "core/arm/nce/patcher.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Loader {
|
namespace Loader {
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
|
||||||
#ifdef HAS_NCE
|
#ifdef HAS_NCE
|
||||||
#include "core/arm/nce/patch.h"
|
#include "core/arm/nce/patcher.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Loader {
|
namespace Loader {
|
||||||
|
|
|
@ -146,8 +146,12 @@ StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||||
return staging_buffer_pool.RequestUploadBuffer(size);
|
return staging_buffer_pool.RequestUploadBuffer(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
|
StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
|
||||||
return staging_buffer_pool.RequestDownloadBuffer(size);
|
return staging_buffer_pool.RequestDownloadBuffer(size, deferred);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) {
|
||||||
|
staging_buffer_pool.FreeDeferredStagingBuffer(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
|
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
|
||||||
|
|
|
@ -66,7 +66,9 @@ public:
|
||||||
|
|
||||||
[[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
|
[[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
|
||||||
|
|
||||||
[[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
|
[[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false);
|
||||||
|
|
||||||
|
void FreeDeferredStagingBuffer(StagingBufferMap& buffer);
|
||||||
|
|
||||||
bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) {
|
bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -246,7 +248,7 @@ struct BufferCacheParams {
|
||||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
|
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
|
||||||
static constexpr bool USE_MEMORY_MAPS = true;
|
static constexpr bool USE_MEMORY_MAPS = true;
|
||||||
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
|
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
|
||||||
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
|
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
|
||||||
|
|
||||||
// TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
|
// TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
|
||||||
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
|
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
|
||||||
|
|
|
@ -28,63 +28,69 @@ StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
|
||||||
|
|
||||||
StagingBuffers::~StagingBuffers() = default;
|
StagingBuffers::~StagingBuffers() = default;
|
||||||
|
|
||||||
StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) {
|
StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence,
|
||||||
|
bool deferred) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_BufferRequest);
|
MICROPROFILE_SCOPE(OpenGL_BufferRequest);
|
||||||
|
|
||||||
const size_t index = RequestBuffer(requested_size);
|
const size_t index = RequestBuffer(requested_size);
|
||||||
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
|
OGLSync* const sync = insert_fence ? &allocs[index].sync : nullptr;
|
||||||
sync_indices[index] = insert_fence ? ++current_sync_index : 0;
|
allocs[index].sync_index = insert_fence ? ++current_sync_index : 0;
|
||||||
|
allocs[index].deferred = deferred;
|
||||||
return StagingBufferMap{
|
return StagingBufferMap{
|
||||||
.mapped_span = std::span(maps[index], requested_size),
|
.mapped_span = std::span(allocs[index].map, requested_size),
|
||||||
.sync = sync,
|
.sync = sync,
|
||||||
.buffer = buffers[index].handle,
|
.buffer = allocs[index].buffer.handle,
|
||||||
|
.index = index,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void StagingBuffers::FreeDeferredStagingBuffer(size_t index) {
|
||||||
|
ASSERT(allocs[index].deferred);
|
||||||
|
allocs[index].deferred = false;
|
||||||
|
}
|
||||||
|
|
||||||
size_t StagingBuffers::RequestBuffer(size_t requested_size) {
|
size_t StagingBuffers::RequestBuffer(size_t requested_size) {
|
||||||
if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
|
if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
|
||||||
return *index;
|
return *index;
|
||||||
}
|
}
|
||||||
|
StagingBufferAlloc alloc;
|
||||||
OGLBuffer& buffer = buffers.emplace_back();
|
alloc.buffer.Create();
|
||||||
buffer.Create();
|
|
||||||
const auto next_pow2_size = Common::NextPow2(requested_size);
|
const auto next_pow2_size = Common::NextPow2(requested_size);
|
||||||
glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr,
|
glNamedBufferStorage(alloc.buffer.handle, next_pow2_size, nullptr,
|
||||||
storage_flags | GL_MAP_PERSISTENT_BIT);
|
storage_flags | GL_MAP_PERSISTENT_BIT);
|
||||||
maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size,
|
alloc.map = static_cast<u8*>(glMapNamedBufferRange(alloc.buffer.handle, 0, next_pow2_size,
|
||||||
map_flags | GL_MAP_PERSISTENT_BIT)));
|
map_flags | GL_MAP_PERSISTENT_BIT));
|
||||||
syncs.emplace_back();
|
alloc.size = next_pow2_size;
|
||||||
sync_indices.emplace_back();
|
allocs.emplace_back(std::move(alloc));
|
||||||
sizes.push_back(next_pow2_size);
|
return allocs.size() - 1;
|
||||||
|
|
||||||
ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
|
|
||||||
maps.size() == sizes.size());
|
|
||||||
|
|
||||||
return buffers.size() - 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
|
std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
|
||||||
size_t known_unsignaled_index = current_sync_index + 1;
|
size_t known_unsignaled_index = current_sync_index + 1;
|
||||||
size_t smallest_buffer = std::numeric_limits<size_t>::max();
|
size_t smallest_buffer = std::numeric_limits<size_t>::max();
|
||||||
std::optional<size_t> found;
|
std::optional<size_t> found;
|
||||||
const size_t num_buffers = sizes.size();
|
const size_t num_buffers = allocs.size();
|
||||||
for (size_t index = 0; index < num_buffers; ++index) {
|
for (size_t index = 0; index < num_buffers; ++index) {
|
||||||
const size_t buffer_size = sizes[index];
|
StagingBufferAlloc& alloc = allocs[index];
|
||||||
|
const size_t buffer_size = alloc.size;
|
||||||
if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
|
if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (syncs[index].handle != 0) {
|
if (alloc.deferred) {
|
||||||
if (sync_indices[index] >= known_unsignaled_index) {
|
continue;
|
||||||
|
}
|
||||||
|
if (alloc.sync.handle != 0) {
|
||||||
|
if (alloc.sync_index >= known_unsignaled_index) {
|
||||||
// This fence is later than a fence that is known to not be signaled
|
// This fence is later than a fence that is known to not be signaled
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!syncs[index].IsSignaled()) {
|
if (!alloc.sync.IsSignaled()) {
|
||||||
// Since this fence hasn't been signaled, it's safe to assume all later
|
// Since this fence hasn't been signaled, it's safe to assume all later
|
||||||
// fences haven't been signaled either
|
// fences haven't been signaled either
|
||||||
known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]);
|
known_unsignaled_index = std::min(known_unsignaled_index, alloc.sync_index);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
syncs[index].Release();
|
alloc.sync.Release();
|
||||||
}
|
}
|
||||||
smallest_buffer = buffer_size;
|
smallest_buffer = buffer_size;
|
||||||
found = index;
|
found = index;
|
||||||
|
@ -143,8 +149,12 @@ StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) {
|
||||||
return upload_buffers.RequestMap(size, true);
|
return upload_buffers.RequestMap(size, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) {
|
StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size, bool deferred) {
|
||||||
return download_buffers.RequestMap(size, false);
|
return download_buffers.RequestMap(size, false, deferred);
|
||||||
|
}
|
||||||
|
|
||||||
|
void StagingBufferPool::FreeDeferredStagingBuffer(StagingBufferMap& buffer) {
|
||||||
|
download_buffers.FreeDeferredStagingBuffer(buffer.index);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -26,23 +26,30 @@ struct StagingBufferMap {
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
OGLSync* sync;
|
OGLSync* sync;
|
||||||
GLuint buffer;
|
GLuint buffer;
|
||||||
|
size_t index;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct StagingBuffers {
|
struct StagingBuffers {
|
||||||
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
|
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
|
||||||
~StagingBuffers();
|
~StagingBuffers();
|
||||||
|
|
||||||
StagingBufferMap RequestMap(size_t requested_size, bool insert_fence);
|
StagingBufferMap RequestMap(size_t requested_size, bool insert_fence, bool deferred = false);
|
||||||
|
|
||||||
|
void FreeDeferredStagingBuffer(size_t index);
|
||||||
|
|
||||||
size_t RequestBuffer(size_t requested_size);
|
size_t RequestBuffer(size_t requested_size);
|
||||||
|
|
||||||
std::optional<size_t> FindBuffer(size_t requested_size);
|
std::optional<size_t> FindBuffer(size_t requested_size);
|
||||||
|
|
||||||
std::vector<OGLSync> syncs;
|
struct StagingBufferAlloc {
|
||||||
std::vector<OGLBuffer> buffers;
|
OGLSync sync;
|
||||||
std::vector<u8*> maps;
|
OGLBuffer buffer;
|
||||||
std::vector<size_t> sizes;
|
u8* map;
|
||||||
std::vector<size_t> sync_indices;
|
size_t size;
|
||||||
|
size_t sync_index;
|
||||||
|
bool deferred;
|
||||||
|
};
|
||||||
|
std::vector<StagingBufferAlloc> allocs;
|
||||||
GLenum storage_flags;
|
GLenum storage_flags;
|
||||||
GLenum map_flags;
|
GLenum map_flags;
|
||||||
size_t current_sync_index = 0;
|
size_t current_sync_index = 0;
|
||||||
|
@ -85,7 +92,8 @@ public:
|
||||||
~StagingBufferPool() = default;
|
~StagingBufferPool() = default;
|
||||||
|
|
||||||
StagingBufferMap RequestUploadBuffer(size_t size);
|
StagingBufferMap RequestUploadBuffer(size_t size);
|
||||||
StagingBufferMap RequestDownloadBuffer(size_t size);
|
StagingBufferMap RequestDownloadBuffer(size_t size, bool deferred = false);
|
||||||
|
void FreeDeferredStagingBuffer(StagingBufferMap& buffer);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
|
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
|
||||||
|
|
|
@ -557,8 +557,12 @@ StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||||
return staging_buffer_pool.RequestUploadBuffer(size);
|
return staging_buffer_pool.RequestUploadBuffer(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
|
StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
|
||||||
return staging_buffer_pool.RequestDownloadBuffer(size);
|
return staging_buffer_pool.RequestDownloadBuffer(size, deferred);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferMap& buffer) {
|
||||||
|
staging_buffer_pool.FreeDeferredStagingBuffer(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
|
u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
|
||||||
|
|
|
@ -74,7 +74,9 @@ public:
|
||||||
|
|
||||||
StagingBufferMap UploadStagingBuffer(size_t size);
|
StagingBufferMap UploadStagingBuffer(size_t size);
|
||||||
|
|
||||||
StagingBufferMap DownloadStagingBuffer(size_t size);
|
StagingBufferMap DownloadStagingBuffer(size_t size, bool deferred = false);
|
||||||
|
|
||||||
|
void FreeDeferredStagingBuffer(StagingBufferMap& buffer);
|
||||||
|
|
||||||
u64 GetDeviceLocalMemory() const {
|
u64 GetDeviceLocalMemory() const {
|
||||||
return device_access_memory;
|
return device_access_memory;
|
||||||
|
@ -359,7 +361,7 @@ struct TextureCacheParams {
|
||||||
static constexpr bool FRAMEBUFFER_BLITS = true;
|
static constexpr bool FRAMEBUFFER_BLITS = true;
|
||||||
static constexpr bool HAS_EMULATED_COPIES = true;
|
static constexpr bool HAS_EMULATED_COPIES = true;
|
||||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
|
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
|
||||||
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
|
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
|
||||||
|
|
||||||
using Runtime = OpenGL::TextureCacheRuntime;
|
using Runtime = OpenGL::TextureCacheRuntime;
|
||||||
using Image = OpenGL::Image;
|
using Image = OpenGL::Image;
|
||||||
|
@ -367,7 +369,7 @@ struct TextureCacheParams {
|
||||||
using ImageView = OpenGL::ImageView;
|
using ImageView = OpenGL::ImageView;
|
||||||
using Sampler = OpenGL::Sampler;
|
using Sampler = OpenGL::Sampler;
|
||||||
using Framebuffer = OpenGL::Framebuffer;
|
using Framebuffer = OpenGL::Framebuffer;
|
||||||
using AsyncBuffer = u32;
|
using AsyncBuffer = OpenGL::StagingBufferMap;
|
||||||
using BufferType = GLuint;
|
using BufferType = GLuint;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -892,10 +892,6 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
||||||
UpdateFrontFace(regs);
|
UpdateFrontFace(regs);
|
||||||
UpdateStencilOp(regs);
|
UpdateStencilOp(regs);
|
||||||
|
|
||||||
if (device.IsExtVertexInputDynamicStateSupported()) {
|
|
||||||
UpdateVertexInput(regs);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (state_tracker.TouchStateEnable()) {
|
if (state_tracker.TouchStateEnable()) {
|
||||||
UpdateDepthBoundsTestEnable(regs);
|
UpdateDepthBoundsTestEnable(regs);
|
||||||
UpdateDepthTestEnable(regs);
|
UpdateDepthTestEnable(regs);
|
||||||
|
@ -918,6 +914,9 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
||||||
UpdateBlending(regs);
|
UpdateBlending(regs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (device.IsExtVertexInputDynamicStateSupported()) {
|
||||||
|
UpdateVertexInput(regs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::HandleTransformFeedback() {
|
void RasterizerVulkan::HandleTransformFeedback() {
|
||||||
|
|
|
@ -519,10 +519,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||||
LOG_WARNING(Render_Vulkan, "ARM drivers have broken VK_EXT_extended_dynamic_state");
|
LOG_WARNING(Render_Vulkan, "ARM drivers have broken VK_EXT_extended_dynamic_state");
|
||||||
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state,
|
RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state,
|
||||||
VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||||
|
|
||||||
LOG_WARNING(Render_Vulkan, "ARM drivers have broken VK_EXT_extended_dynamic_state2");
|
|
||||||
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
|
|
||||||
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_nvidia) {
|
if (is_nvidia) {
|
||||||
|
@ -611,18 +607,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (extensions.vertex_input_dynamic_state && is_qualcomm) {
|
if (extensions.vertex_input_dynamic_state && is_qualcomm) {
|
||||||
const u32 version = (properties.properties.driverVersion << 3) >> 3;
|
// Qualcomm drivers do not properly support vertex_input_dynamic_state.
|
||||||
if (version >= VK_MAKE_API_VERSION(0, 0, 676, 0) &&
|
LOG_WARNING(Render_Vulkan,
|
||||||
version < VK_MAKE_API_VERSION(0, 0, 680, 0)) {
|
"Qualcomm drivers have broken VK_EXT_vertex_input_dynamic_state");
|
||||||
// Qualcomm Adreno 7xx drivers do not properly support vertex_input_dynamic_state.
|
|
||||||
LOG_WARNING(
|
|
||||||
Render_Vulkan,
|
|
||||||
"Qualcomm Adreno 7xx drivers have broken VK_EXT_vertex_input_dynamic_state");
|
|
||||||
RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
|
RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
|
||||||
features.vertex_input_dynamic_state,
|
features.vertex_input_dynamic_state,
|
||||||
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
sets_per_pool = 64;
|
sets_per_pool = 64;
|
||||||
if (is_amd_driver) {
|
if (is_amd_driver) {
|
||||||
|
@ -704,6 +695,22 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||||
std::min(properties.properties.limits.maxVertexInputBindings, 16U);
|
std::min(properties.properties.limits.maxVertexInputBindings, 16U);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!extensions.extended_dynamic_state && extensions.extended_dynamic_state2) {
|
||||||
|
LOG_INFO(Render_Vulkan,
|
||||||
|
"Removing extendedDynamicState2 due to missing extendedDynamicState");
|
||||||
|
RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
|
||||||
|
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!extensions.extended_dynamic_state2 && extensions.extended_dynamic_state3) {
|
||||||
|
LOG_INFO(Render_Vulkan,
|
||||||
|
"Removing extendedDynamicState3 due to missing extendedDynamicState2");
|
||||||
|
RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3,
|
||||||
|
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||||
|
dynamic_state3_blending = false;
|
||||||
|
dynamic_state3_enables = false;
|
||||||
|
}
|
||||||
|
|
||||||
logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions),
|
logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions),
|
||||||
first_next, dld);
|
first_next, dld);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue