early-access version 2644

This commit is contained in:
pineappleEA 2022-04-04 23:44:17 +02:00
parent 190c6bed11
commit cea32b5c92
29 changed files with 346 additions and 156 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 2643.
This is the source code for early-access 2644.
## Legal Notice

View file

@ -149,7 +149,9 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
reg_alloc.AssertNoMoreUses();
EmitAddCycles(block.CycleCount());
if (conf.enable_cycle_counting) {
EmitAddCycles(block.CycleCount());
}
EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3();
@ -184,7 +186,9 @@ void A32EmitX64::EmitCondPrelude(const A32EmitContext& ctx) {
ASSERT(ctx.block.HasConditionFailedLocation());
Xbyak::Label pass = EmitCond(ctx.block.GetCondition());
EmitAddCycles(ctx.block.ConditionFailedCycleCount());
if (conf.enable_cycle_counting) {
EmitAddCycles(ctx.block.ConditionFailedCycleCount());
}
EmitTerminal(IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.L(pass);
}
@ -715,31 +719,37 @@ void A32EmitX64::EmitA32UpdateUpperLocationDescriptor(A32EmitContext& ctx, IR::I
}
void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr);
code.SwitchMxcsrOnExit();
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
ctx.reg_alloc.EndOfAllocScope();
if (conf.enable_cycle_counting) {
ctx.reg_alloc.HostCall(nullptr);
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
ctx.reg_alloc.EndOfAllocScope();
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
Devirtualize<&A32::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code);
Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN);
code.SwitchMxcsrOnEntry();
if (conf.enable_cycle_counting) {
Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN);
code.SwitchMxcsrOnEntry();
}
}
void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr);
code.SwitchMxcsrOnExit();
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
ctx.reg_alloc.HostCall(nullptr);
if (conf.enable_cycle_counting) {
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
}
ctx.reg_alloc.EndOfAllocScope();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -751,10 +761,12 @@ void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(param[1], exception);
});
Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN);
code.SwitchMxcsrOnEntry();
if (conf.enable_cycle_counting) {
Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN);
code.SwitchMxcsrOnEntry();
}
}
static u32 GetFpscrImpl(A32JitState* jit_state) {
@ -1134,14 +1146,26 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
return;
}
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
patch_information[terminal.next].jg.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJg(terminal.next);
}
} else {
EmitPatchJg(terminal.next);
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
patch_information[terminal.next].jz.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJz(terminal.next);
}
}
Xbyak::Label dest;
code.jmp(dest, Xbyak::CodeGenerator::T_NEAR);
@ -1206,7 +1230,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr
}
void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
code.cmp(code.byte[r15 + offsetof(A32JitState, halt_requested)], u8(0));
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
code.jne(code.GetForceReturnFromRunCodeAddress());
EmitTerminal(terminal.else_, initial_location, is_single_step);
}
@ -1222,6 +1246,17 @@ void A32EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr
code.EnsurePatchLocationSize(patch_location, 14);
}
void A32EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {
code.jz(target_code_ptr);
} else {
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{target_desc}.PC());
code.jz(code.GetReturnFromRunCodeAddress());
}
code.EnsurePatchLocationSize(patch_location, 14);
}
void A32EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {

View file

@ -138,6 +138,7 @@ protected:
// Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
};

View file

@ -16,6 +16,7 @@
#include "dynarmic/backend/x64/devirtualize.h"
#include "dynarmic/backend/x64/jitstate_info.h"
#include "dynarmic/common/assert.h"
#include "dynarmic/common/atomic.h"
#include "dynarmic/common/cast_util.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/common/scope_exit.h"
@ -31,11 +32,12 @@ namespace Dynarmic::A32 {
using namespace Backend::X64;
static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg, const A32::UserConfig& conf) {
return RunCodeCallbacks{
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(cb)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(cb)),
conf.enable_cycle_counting,
};
}
@ -58,7 +60,7 @@ static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code)
struct Jit::Impl {
Impl(Jit* jit, A32::UserConfig conf)
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit)
, polyfill_options(GenPolyfillOptions(block_of_code))
, conf(std::move(conf))
@ -76,7 +78,7 @@ struct Jit::Impl {
boost::icl::interval_set<u32> invalid_cache_ranges;
bool invalidate_entire_cache = false;
void Execute() {
HaltReason Execute() {
const CodePtr current_codeptr = [this] {
// RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
@ -88,11 +90,15 @@ struct Jit::Impl {
return GetCurrentBlock();
}();
block_of_code.RunCode(&jit_state, current_codeptr);
return block_of_code.RunCode(&jit_state, current_codeptr);
}
void Step() {
block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
HaltReason Step() {
return block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
}
void HaltExecution(HaltReason hr) {
Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr));
}
void ClearExclusiveState() {
@ -123,7 +129,7 @@ struct Jit::Impl {
void RequestCacheInvalidation() {
if (jit_interface->is_executing) {
jit_state.halt_requested = true;
HaltExecution(HaltReason::CacheInvalidation);
return;
}
@ -182,28 +188,28 @@ Jit::Jit(UserConfig conf)
Jit::~Jit() = default;
void Jit::Run() {
HaltReason Jit::Run() {
ASSERT(!is_executing);
is_executing = true;
SCOPE_EXIT { this->is_executing = false; };
impl->jit_state.halt_requested = false;
impl->Execute();
const HaltReason hr = impl->Execute();
impl->PerformCacheInvalidation();
return hr;
}
void Jit::Step() {
HaltReason Jit::Step() {
ASSERT(!is_executing);
is_executing = true;
SCOPE_EXIT { this->is_executing = false; };
impl->jit_state.halt_requested = true;
impl->Step();
const HaltReason hr = impl->Step();
impl->PerformCacheInvalidation();
return hr;
}
void Jit::ClearCache() {
@ -221,8 +227,8 @@ void Jit::Reset() {
impl->jit_state = {};
}
void Jit::HaltExecution() {
impl->jit_state.halt_requested = true;
void Jit::HaltExecution(HaltReason hr) {
impl->HaltExecution(hr);
}
void Jit::ClearExclusiveState() {

View file

@ -40,7 +40,7 @@ struct A32JitState {
// For internal use (See: BlockOfCode::RunCode)
u32 guest_MXCSR = 0x00001f80;
u32 asimd_MXCSR = 0x00009fc0;
bool halt_requested = false;
volatile u32 halt_reason = 0;
// Exclusive state
u32 exclusive_state = 0;

View file

@ -121,7 +121,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
reg_alloc.AssertNoMoreUses();
EmitAddCycles(block.CycleCount());
if (conf.enable_cycle_counting) {
EmitAddCycles(block.CycleCount());
}
EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3();
@ -619,14 +621,26 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
return;
}
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.emplace_back(code.getCurr());
if (auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
patch_information[terminal.next].jg.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJg(terminal.next);
}
} else {
EmitPatchJg(terminal.next);
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
patch_information[terminal.next].jz.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJz(terminal.next);
}
}
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode();
@ -691,7 +705,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr
}
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
code.cmp(code.byte[r15 + offsetof(A64JitState, halt_requested)], u8(0));
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
code.jne(code.GetForceReturnFromRunCodeAddress());
EmitTerminal(terminal.else_, initial_location, is_single_step);
}
@ -708,6 +722,18 @@ void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr
code.EnsurePatchLocationSize(patch_location, 23);
}
void A64EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {
code.jz(target_code_ptr);
} else {
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.jz(code.GetReturnFromRunCodeAddress());
}
code.EnsurePatchLocationSize(patch_location, 23);
}
void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {

View file

@ -135,6 +135,7 @@ protected:
// Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
};

View file

@ -15,6 +15,7 @@
#include "dynarmic/backend/x64/devirtualize.h"
#include "dynarmic/backend/x64/jitstate_info.h"
#include "dynarmic/common/assert.h"
#include "dynarmic/common/atomic.h"
#include "dynarmic/common/scope_exit.h"
#include "dynarmic/common/x64_disassemble.h"
#include "dynarmic/frontend/A64/translate/a64_translate.h"
@ -26,11 +27,12 @@ namespace Dynarmic::A64 {
using namespace Backend::X64;
static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg, const A64::UserConfig& conf) {
return RunCodeCallbacks{
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::AddTicks>(cb)),
std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::GetTicksRemaining>(cb)),
conf.enable_cycle_counting,
};
}
@ -55,7 +57,7 @@ struct Jit::Impl final {
public:
Impl(Jit* jit, UserConfig conf)
: conf(conf)
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit)
, polyfill_options(GenPolyfillOptions(block_of_code)) {
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
@ -63,13 +65,12 @@ public:
~Impl() = default;
void Run() {
HaltReason Run() {
ASSERT(!is_executing);
PerformRequestedCacheInvalidation();
is_executing = true;
SCOPE_EXIT { this->is_executing = false; };
jit_state.halt_requested = false;
// TODO: Check code alignment
@ -83,29 +84,33 @@ public:
return GetCurrentBlock();
}();
block_of_code.RunCode(&jit_state, current_code_ptr);
const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr);
PerformRequestedCacheInvalidation();
return hr;
}
void Step() {
HaltReason Step() {
ASSERT(!is_executing);
PerformRequestedCacheInvalidation();
is_executing = true;
SCOPE_EXIT { this->is_executing = false; };
jit_state.halt_requested = true;
block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
const HaltReason hr = block_of_code.StepCode(&jit_state, GetCurrentSingleStep());
PerformRequestedCacheInvalidation();
return hr;
}
void ClearCache() {
std::unique_lock lock{invalidation_mutex};
invalidate_entire_cache = true;
if (is_executing) {
jit_state.halt_requested = true;
HaltExecution(HaltReason::CacheInvalidation);
}
}
@ -115,7 +120,7 @@ public:
const auto range = boost::icl::discrete_interval<u64>::closed(start_address, end_address);
invalid_cache_ranges.add(range);
if (is_executing) {
jit_state.halt_requested = true;
HaltExecution(HaltReason::CacheInvalidation);
}
}
@ -124,8 +129,8 @@ public:
jit_state = {};
}
void HaltExecution() {
jit_state.halt_requested = true;
void HaltExecution(HaltReason hr) {
Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr));
}
u64 GetSP() const {
@ -279,7 +284,7 @@ private:
void RequestCacheInvalidation() {
if (is_executing) {
jit_state.halt_requested = true;
HaltExecution(HaltReason::CacheInvalidation);
return;
}
@ -321,12 +326,12 @@ Jit::Jit(UserConfig conf)
Jit::~Jit() = default;
void Jit::Run() {
impl->Run();
HaltReason Jit::Run() {
return impl->Run();
}
void Jit::Step() {
impl->Step();
HaltReason Jit::Step() {
return impl->Step();
}
void Jit::ClearCache() {
@ -341,8 +346,8 @@ void Jit::Reset() {
impl->Reset();
}
void Jit::HaltExecution() {
impl->HaltExecution();
void Jit::HaltExecution(HaltReason hr) {
impl->HaltExecution(hr);
}
u64 Jit::GetSP() const {

View file

@ -43,7 +43,7 @@ struct A64JitState {
// For internal use (See: BlockOfCode::RunCode)
u32 guest_MXCSR = 0x00001f80;
u32 asimd_MXCSR = 0x00009fc0;
bool halt_requested = false;
volatile u32 halt_reason = 0;
// Exclusive state
static constexpr u64 RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull;

View file

@ -201,12 +201,12 @@ size_t BlockOfCode::SpaceRemaining() const {
return std::min(reinterpret_cast<const u8*>(far_code_begin) - current_near_ptr, &top_[maxSize_] - current_far_ptr);
}
void BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const {
run_code(jit_state, code_ptr);
HaltReason BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const {
return run_code(jit_state, code_ptr);
}
void BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const {
step_code(jit_state, code_ptr);
HaltReason BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const {
return step_code(jit_state, code_ptr);
}
void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) {
@ -224,6 +224,8 @@ void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) {
}
void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
Xbyak::Label return_to_caller, return_to_caller_mxcsr_already_exited;
align();
run_code = getCurr<RunCodeFuncType>();
@ -236,12 +238,17 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
mov(r15, ABI_PARAM1);
mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
cb.GetTicksRemaining->EmitCall(*this);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN);
if (cb.enable_cycle_counting) {
cb.GetTicksRemaining->EmitCall(*this);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN);
}
rcp(*this);
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
SwitchMxcsrOnEntry();
jmp(rbx);
@ -252,31 +259,44 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
mov(r15, ABI_PARAM1);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 1);
if (cb.enable_cycle_counting) {
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 1);
}
rcp(*this);
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
lock();
or_(dword[r15 + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
SwitchMxcsrOnEntry();
jmp(ABI_PARAM2);
// Dispatcher loop
Xbyak::Label return_to_caller, return_to_caller_mxcsr_already_exited;
align();
return_from_run_code[0] = getCurr<const void*>();
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
jng(return_to_caller);
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
jne(return_to_caller);
if (cb.enable_cycle_counting) {
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
jng(return_to_caller);
}
cb.LookupBlock->EmitCall(*this);
jmp(ABI_RETURN);
align();
return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr<const void*>();
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
jng(return_to_caller_mxcsr_already_exited);
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited);
if (cb.enable_cycle_counting) {
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
jng(return_to_caller_mxcsr_already_exited);
}
SwitchMxcsrOnEntry();
cb.LookupBlock->EmitCall(*this);
jmp(ABI_RETURN);
@ -291,10 +311,16 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr<const void*>();
L(return_to_caller_mxcsr_already_exited);
cb.AddTicks->EmitCall(*this, [this](RegList param) {
mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
});
if (cb.enable_cycle_counting) {
cb.AddTicks->EmitCall(*this, [this](RegList param) {
mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
});
}
xor_(eax, eax);
lock();
xchg(dword[r15 + jsi.offsetof_halt_reason], eax);
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
ret();
@ -323,6 +349,10 @@ void BlockOfCode::LeaveStandardASIMD() {
}
void BlockOfCode::UpdateTicks() {
if (!cb.enable_cycle_counting) {
return;
}
cb.AddTicks->EmitCall(*this, [this](RegList param) {
mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);

View file

@ -20,6 +20,7 @@
#include "dynarmic/backend/x64/jitstate_info.h"
#include "dynarmic/common/cast_util.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/interface/halt_reason.h"
namespace Dynarmic::Backend::X64 {
@ -29,6 +30,7 @@ struct RunCodeCallbacks {
std::unique_ptr<Callback> LookupBlock;
std::unique_ptr<Callback> AddTicks;
std::unique_ptr<Callback> GetTicksRemaining;
bool enable_cycle_counting;
};
class BlockOfCode final : public Xbyak::CodeGenerator {
@ -50,9 +52,9 @@ public:
size_t SpaceRemaining() const;
/// Runs emulated code from code_ptr.
void RunCode(void* jit_state, CodePtr code_ptr) const;
HaltReason RunCode(void* jit_state, CodePtr code_ptr) const;
/// Runs emulated code from code_ptr for a single cycle.
void StepCode(void* jit_state, CodePtr code_ptr) const;
HaltReason StepCode(void* jit_state, CodePtr code_ptr) const;
/// Code emitter: Returns to dispatcher
void ReturnFromRunCode(bool mxcsr_already_exited = false);
/// Code emitter: Returns to dispatcher, forces return to host
@ -183,7 +185,7 @@ private:
CodePtr near_code_ptr;
CodePtr far_code_ptr;
using RunCodeFuncType = void (*)(void*, CodePtr);
using RunCodeFuncType = HaltReason (*)(void*, CodePtr);
RunCodeFuncType run_code = nullptr;
RunCodeFuncType step_code = nullptr;
static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0;

View file

@ -306,6 +306,11 @@ void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_co
EmitPatchJg(target_desc, target_code_ptr);
}
for (CodePtr location : patch_info.jz) {
code.SetCodePtr(location);
EmitPatchJz(target_desc, target_code_ptr);
}
for (CodePtr location : patch_info.jmp) {
code.SetCodePtr(location);
EmitPatchJmp(target_desc, target_code_ptr);

View file

@ -111,12 +111,14 @@ protected:
// Patching
struct PatchInformation {
std::vector<CodePtr> jg;
std::vector<CodePtr> jz;
std::vector<CodePtr> jmp;
std::vector<CodePtr> mov_rcx;
};
void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
virtual void Unpatch(const IR::LocationDescriptor& target_desc);
virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0;

View file

@ -319,9 +319,9 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool ordered = true;
constexpr bool ordered = true;
if (ordered && bitsize == 128) {
if constexpr (ordered && bitsize == 128) {
// Required for atomic 128-bit loads/stores
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
@ -394,7 +394,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool ordered = true;
constexpr bool ordered = true;
const auto value = [&] {
if constexpr (bitsize == 128) {

View file

@ -20,7 +20,8 @@ struct JitStateInfo {
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
, offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv))
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) {}
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
, offsetof_halt_reason(offsetof(JitStateType, halt_reason)) {}
const size_t offsetof_guest_MXCSR;
const size_t offsetof_asimd_MXCSR;
@ -31,6 +32,7 @@ struct JitStateInfo {
const size_t offsetof_cpsr_nzcv;
const size_t offsetof_fpsr_exc;
const size_t offsetof_fpsr_qc;
const size_t offsetof_halt_reason;
};
} // namespace Dynarmic::Backend::X64

View file

@ -0,0 +1,20 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/common/common_types.h"
namespace Dynarmic::Atomic {
inline void Or(volatile u32* ptr, u32 value) {
#ifdef _MSC_VER
_InterlockedOr(reinterpret_cast<volatile long*>(ptr), value);
#else
__atomic_or_fetch(ptr, value, __ATOMIC_SEQ_CST);
#endif
}
} // namespace Dynarmic::Atomic

View file

@ -12,6 +12,7 @@
#include <vector>
#include "dynarmic/interface/A32/config.h"
#include "dynarmic/interface/halt_reason.h"
namespace Dynarmic {
namespace A32 {
@ -27,13 +28,13 @@ public:
* Runs the emulated CPU.
* Cannot be recursively called.
*/
void Run();
HaltReason Run();
/**
* Steps the emulated CPU.
* Cannot be recursively called.
*/
void Step();
HaltReason Step();
/**
* Clears the code cache of all compiled code.
@ -58,7 +59,7 @@ public:
* Stops execution in Jit::Run.
* Can only be called from a callback.
*/
void HaltExecution();
void HaltExecution(HaltReason hr = HaltReason::UserDefined1);
/// View and modify registers.
std::array<std::uint32_t, 16>& Regs();

View file

@ -208,6 +208,10 @@ struct UserConfig {
/// to avoid writting certain unnecessary code only needed for cycle timers.
bool wall_clock_cntpct = false;
/// This option allows you to disable cycle counting. If this is set to false,
/// AddTicks and GetTicksRemaining are never called, and no cycle counting is done.
bool enable_cycle_counting = true;
/// This option relates to the CPSR.E flag. Enabling this option disables modification
/// of CPSR.E by the emulated program, forcing it to 0.
/// NOTE: Calling Jit::SetCpsr with CPSR.E=1 while this option is enabled may result

View file

@ -13,6 +13,7 @@
#include <vector>
#include "dynarmic/interface/A64/config.h"
#include "dynarmic/interface/halt_reason.h"
namespace Dynarmic {
namespace A64 {
@ -28,13 +29,13 @@ public:
* Runs the emulated CPU.
* Cannot be recursively called.
*/
void Run();
HaltReason Run();
/**
* Step the emulated CPU for one instruction.
* Cannot be recursively called.
*/
void Step();
HaltReason Step();
/**
* Clears the code cache of all compiled code.
@ -59,7 +60,7 @@ public:
* Stops execution in Jit::Run.
* Can only be called from a callback.
*/
void HaltExecution();
void HaltExecution(HaltReason hr = HaltReason::UserDefined1);
/// Read Stack Pointer
std::uint64_t GetSP() const;
@ -118,7 +119,7 @@ public:
/// Debugging: Dump a disassembly all of compiled code to the console.
void DumpDisassembly() const;
/*
/*
* Disassemble the instructions following the current pc and return
* the resulting instructions as a vector of their string representations.
*/

View file

@ -273,9 +273,9 @@ struct UserConfig {
/// to avoid writting certain unnecessary code only needed for cycle timers.
bool wall_clock_cntpct = false;
// Determines whether AddTicks and GetTicksRemaining are called.
// If false, execution will continue until soon after Jit::HaltExecution is called.
// bool enable_ticks = true; // TODO
/// This option allows you to disable cycle counting. If this is set to false,
/// AddTicks and GetTicksRemaining are never called, and no cycle counting is done.
bool enable_cycle_counting = true;
// Minimum size is about 8MiB. Maximum size is about 2GiB. Maximum size is limited by
// the maximum length of a x64 jump.

View file

@ -0,0 +1,53 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2020 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <cstdint>
namespace Dynarmic {
enum class HaltReason : std::uint32_t {
Step = 0x00000001,
CacheInvalidation = 0x00000002,
UserDefined1 = 0x01000000,
UserDefined2 = 0x02000000,
UserDefined3 = 0x04000000,
UserDefined4 = 0x08000000,
UserDefined5 = 0x10000000,
UserDefined6 = 0x20000000,
UserDefined7 = 0x40000000,
UserDefined8 = 0x80000000,
};
constexpr HaltReason operator~(HaltReason hr) {
return static_cast<HaltReason>(~static_cast<std::uint32_t>(hr));
}
constexpr HaltReason operator|(HaltReason hr1, HaltReason hr2) {
return static_cast<HaltReason>(static_cast<std::uint32_t>(hr1) | static_cast<std::uint32_t>(hr2));
}
constexpr HaltReason operator&(HaltReason hr1, HaltReason hr2) {
return static_cast<HaltReason>(static_cast<std::uint32_t>(hr1) & static_cast<std::uint32_t>(hr2));
}
constexpr HaltReason operator|=(HaltReason& result, HaltReason hr) {
return result = (result | hr);
}
constexpr HaltReason operator&=(HaltReason& result, HaltReason hr) {
return result = (result & hr);
}
constexpr bool operator!(HaltReason hr) {
return static_cast<std::uint32_t>(hr) == 0;
}
constexpr bool Has(HaltReason hr1, HaltReason hr2) {
return (static_cast<std::uint32_t>(hr1) & static_cast<std::uint32_t>(hr2)) != 0;
}
} // namespace Dynarmic

View file

@ -171,6 +171,9 @@ public:
/// Prepare core for thread reschedule (if needed to correctly handle state)
virtual void PrepareReschedule() = 0;
/// Signal an interrupt and ask the core to halt as soon as possible.
virtual void SignalInterrupt() = 0;
struct BacktraceEntry {
std::string module;
u64 address;

View file

@ -25,6 +25,9 @@ namespace Core {
using namespace Common::Literals;
constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
public:
explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_)
@ -82,15 +85,13 @@ public:
}
void CallSVC(u32 swi) override {
parent.svc_called = true;
parent.svc_swi = swi;
parent.jit->HaltExecution();
parent.jit->HaltExecution(svc_call);
}
void AddTicks(u64 ticks) override {
if (parent.uses_wall_clock) {
return;
}
ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
// Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
// rough approximation of the amount of executed ticks in the system, it may be thrown off
// if not all cores are doing a similar amount of work. Instead of doing this, we should
@ -106,12 +107,8 @@ public:
}
u64 GetTicksRemaining() override {
if (parent.uses_wall_clock) {
if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
return minimum_run_cycles;
}
return 0U;
}
ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
}
@ -146,6 +143,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
// Timing
config.wall_clock_cntpct = uses_wall_clock;
config.enable_cycle_counting = !uses_wall_clock;
// Code cache size
config.code_cache_size = 512_MiB;
@ -228,13 +226,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
void ARM_Dynarmic_32::Run() {
while (true) {
jit->Run();
if (!svc_called) {
break;
const auto hr = jit->Run();
if (Has(hr, svc_call)) {
Kernel::Svc::Call(system, svc_swi);
}
svc_called = false;
Kernel::Svc::Call(system, svc_swi);
if (shutdown) {
if (Has(hr, break_loop)) {
break;
}
}
@ -320,8 +316,11 @@ void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) {
}
void ARM_Dynarmic_32::PrepareReschedule() {
jit->HaltExecution();
shutdown = true;
jit->HaltExecution(break_loop);
}
void ARM_Dynarmic_32::SignalInterrupt() {
jit->HaltExecution(break_loop);
}
void ARM_Dynarmic_32::ClearInstructionCache() {

View file

@ -57,6 +57,7 @@ public:
void LoadContext(const ThreadContext64& ctx) override {}
void PrepareReschedule() override;
void SignalInterrupt() override;
void ClearExclusiveState() override;
void ClearInstructionCache() override;
@ -83,9 +84,6 @@ private:
// SVC callback
u32 svc_swi{};
bool svc_called{};
bool shutdown{};
};
} // namespace Core

View file

@ -26,6 +26,9 @@ namespace Core {
using Vector = Dynarmic::A64::Vector;
using namespace Common::Literals;
constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
public:
explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent_)
@ -105,7 +108,7 @@ public:
break;
}
parent.jit->HaltExecution();
parent.jit->HaltExecution(Dynarmic::HaltReason::CacheInvalidation);
}
void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override {
@ -124,15 +127,12 @@ public:
}
void CallSVC(u32 swi) override {
parent.svc_called = true;
parent.svc_swi = swi;
parent.jit->HaltExecution();
parent.jit->HaltExecution(svc_call);
}
void AddTicks(u64 ticks) override {
if (parent.uses_wall_clock) {
return;
}
ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
// Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
// rough approximation of the amount of executed ticks in the system, it may be thrown off
@ -147,12 +147,8 @@ public:
}
u64 GetTicksRemaining() override {
if (parent.uses_wall_clock) {
if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
return minimum_run_cycles;
}
return 0U;
}
ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
}
@ -208,6 +204,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
// Timing
config.wall_clock_cntpct = uses_wall_clock;
config.enable_cycle_counting = !uses_wall_clock;
// Code cache size
config.code_cache_size = 512_MiB;
@ -290,13 +287,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
void ARM_Dynarmic_64::Run() {
while (true) {
jit->Run();
if (!svc_called) {
break;
const auto hr = jit->Run();
if (Has(hr, svc_call)) {
Kernel::Svc::Call(system, svc_swi);
}
svc_called = false;
Kernel::Svc::Call(system, svc_swi);
if (shutdown) {
if (Has(hr, break_loop)) {
break;
}
}
@ -387,8 +382,11 @@ void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) {
}
void ARM_Dynarmic_64::PrepareReschedule() {
jit->HaltExecution();
shutdown = true;
jit->HaltExecution(break_loop);
}
void ARM_Dynarmic_64::SignalInterrupt() {
jit->HaltExecution(break_loop);
}
void ARM_Dynarmic_64::ClearInstructionCache() {

View file

@ -51,6 +51,7 @@ public:
void LoadContext(const ThreadContext64& ctx) override;
void PrepareReschedule() override;
void SignalInterrupt() override;
void ClearExclusiveState() override;
void ClearInstructionCache() override;
@ -77,9 +78,6 @@ private:
// SVC callback
u32 svc_swi{};
bool svc_called{};
bool shutdown{};
};
} // namespace Core

View file

@ -58,6 +58,7 @@ bool PhysicalCore::IsInterrupted() const {
void PhysicalCore::Interrupt() {
guard->lock();
interrupts[core_index].SetInterrupt(true);
arm_interface->SignalInterrupt();
guard->unlock();
}

View file

@ -105,7 +105,7 @@ struct ImageDescriptor {
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
struct Info {
static constexpr size_t MAX_INDIRECT_CBUFS{15};
static constexpr size_t MAX_INDIRECT_CBUFS{14};
static constexpr size_t MAX_CBUFS{18};
static constexpr size_t MAX_SSBOS{32};

View file

@ -561,7 +561,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
const std::span<const u8> src = input.subspan(host_offset);
gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
num_tiles.depth, block.height, block.depth);