early-access version 2545

This commit is contained in:
pineappleEA 2022-03-13 10:13:48 +01:00
parent 6d9b02c048
commit b98acbedfe
35 changed files with 382 additions and 239 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 2543. This is the source code for early-access 2545.
## Legal Notice ## Legal Notice

View file

@ -98,7 +98,7 @@ add_library(dynarmic
ir/type.h ir/type.h
ir/value.cpp ir/value.cpp
ir/value.h ir/value.h
) ir/access_type.h)
if ("A32" IN_LIST DYNARMIC_FRONTENDS) if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE target_sources(dynarmic PRIVATE

View file

@ -85,6 +85,7 @@ void A32EmitX64::GenFastmemFallbacks() {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
} }
} }
code.ZeroExtendFrom(bitsize, code.ABI_PARAM3);
callback.EmitCall(code); callback.EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStack(code); ABI_PopCallerSaveRegistersAndAdjustStack(code);
code.ret(); code.ret();
@ -110,7 +111,9 @@ void A32EmitX64::GenFastmemFallbacks() {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
} }
} }
code.ZeroExtendFrom(bitsize, code.ABI_PARAM3);
code.mov(code.ABI_PARAM4, rax); code.mov(code.ABI_PARAM4, rax);
code.ZeroExtendFrom(bitsize, code.ABI_PARAM4);
callback.EmitCall(code); callback.EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLoc::RAX); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLoc::RAX);
code.ret(); code.ret();

View file

@ -22,6 +22,7 @@
#include "dynarmic/common/spin_lock_x64.h" #include "dynarmic/common/spin_lock_x64.h"
#include "dynarmic/common/x64_disassemble.h" #include "dynarmic/common/x64_disassemble.h"
#include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/access_type.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
@ -227,6 +228,7 @@ void A64EmitX64::GenFastmemFallbacks() {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
} }
} }
code.ZeroExtendFrom(bitsize, code.ABI_PARAM3);
callback.EmitCall(code); callback.EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStack(code); ABI_PopCallerSaveRegistersAndAdjustStack(code);
code.ret(); code.ret();
@ -252,7 +254,9 @@ void A64EmitX64::GenFastmemFallbacks() {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
} }
} }
code.ZeroExtendFrom(bitsize, code.ABI_PARAM3);
code.mov(code.ABI_PARAM4, rax); code.mov(code.ABI_PARAM4, rax);
code.ZeroExtendFrom(bitsize, code.ABI_PARAM4);
callback.EmitCall(code); callback.EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLoc::RAX); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLoc::RAX);
code.ret(); code.ret();
@ -438,45 +442,102 @@ Xbyak::RegExp EmitFastmemVAddr(BlockOfCode& code, A64EmitContext& ctx, Xbyak::La
} }
template<std::size_t bitsize> template<std::size_t bitsize>
void EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::RegExp& addr) { const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::RegExp& addr, bool ordered) {
if (ordered) {
if constexpr (bitsize == 128) {
code.mfence();
} else {
code.xor_(Xbyak::Reg32{value_idx}, Xbyak::Reg32{value_idx});
}
const void* fastmem_location = code.getCurr();
switch (bitsize) {
case 8:
code.lock();
code.xadd(code.byte[addr], Xbyak::Reg32{value_idx}.cvt8());
return fastmem_location;
case 16:
code.lock();
code.xadd(word[addr], Xbyak::Reg32{value_idx});
return fastmem_location;
case 32:
code.lock();
code.xadd(dword[addr], Xbyak::Reg32{value_idx});
return fastmem_location;
case 64:
code.lock();
code.xadd(qword[addr], Xbyak::Reg64{value_idx});
return fastmem_location;
case 128:
code.movaps(Xbyak::Xmm{value_idx}, xword[addr]);
return fastmem_location;
default:
ASSERT_FALSE("Invalid bitsize");
}
}
const void* fastmem_location = code.getCurr();
switch (bitsize) { switch (bitsize) {
case 8: case 8:
code.movzx(Xbyak::Reg32{value_idx}, code.byte[addr]); code.movzx(Xbyak::Reg32{value_idx}, code.byte[addr]);
return; return fastmem_location;
case 16: case 16:
code.movzx(Xbyak::Reg32{value_idx}, word[addr]); code.movzx(Xbyak::Reg32{value_idx}, word[addr]);
return; return fastmem_location;
case 32: case 32:
code.mov(Xbyak::Reg32{value_idx}, dword[addr]); code.mov(Xbyak::Reg32{value_idx}, dword[addr]);
return; return fastmem_location;
case 64: case 64:
code.mov(Xbyak::Reg64{value_idx}, qword[addr]); code.mov(Xbyak::Reg64{value_idx}, qword[addr]);
return; return fastmem_location;
case 128: case 128:
code.movups(Xbyak::Xmm{value_idx}, xword[addr]); code.movups(Xbyak::Xmm{value_idx}, xword[addr]);
return; return fastmem_location;
default: default:
ASSERT_FALSE("Invalid bitsize"); ASSERT_FALSE("Invalid bitsize");
} }
} }
template<std::size_t bitsize> template<std::size_t bitsize>
void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int value_idx) { void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int value_idx, bool ordered) {
switch (bitsize) { switch (bitsize) {
case 8: case 8:
code.mov(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8()); if (ordered) {
code.xchg(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8());
} else {
code.mov(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8());
}
return; return;
case 16: case 16:
code.mov(word[addr], Xbyak::Reg16{value_idx}); if (ordered) {
code.xchg(word[addr], Xbyak::Reg16{value_idx});
} else {
code.mov(word[addr], Xbyak::Reg16{value_idx});
}
return; return;
case 32: case 32:
code.mov(dword[addr], Xbyak::Reg32{value_idx}); if (ordered) {
code.xchg(dword[addr], Xbyak::Reg32{value_idx});
} else {
code.mov(dword[addr], Xbyak::Reg32{value_idx});
}
return; return;
case 64: case 64:
code.mov(qword[addr], Xbyak::Reg64{value_idx}); if (ordered) {
code.xchg(qword[addr], Xbyak::Reg64{value_idx});
} else {
code.mov(qword[addr], Xbyak::Reg64{value_idx});
}
return; return;
case 128: case 128:
code.movups(xword[addr], Xbyak::Xmm{value_idx}); if (ordered) {
code.movaps(xword[addr], Xbyak::Xmm{value_idx});
code.mfence();
} else {
code.movups(xword[addr], Xbyak::Xmm{value_idx});
}
return; return;
default: default:
ASSERT_FALSE("Invalid bitsize"); ASSERT_FALSE("Invalid bitsize");
@ -488,16 +549,24 @@ void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int value_
template<std::size_t bitsize, auto callback> template<std::size_t bitsize, auto callback>
void A64EmitX64::EmitMemoryRead(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitMemoryRead(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const IR::AccessType acctype = args[2].GetImmediateAccType();
const bool ordered = acctype == IR::AccessType::ORDERED || acctype == IR::AccessType::ORDEREDRW || acctype == IR::AccessType::LIMITEDORDERED;
const auto fastmem_marker = ShouldFastmem(ctx, inst); const auto fastmem_marker = ShouldFastmem(ctx, inst);
if (!conf.page_table && !fastmem_marker) { if (!conf.page_table && !fastmem_marker) {
// Neither fastmem nor page table: Use callbacks // Neither fastmem nor page table: Use callbacks
if constexpr (bitsize == 128) { if constexpr (bitsize == 128) {
ctx.reg_alloc.HostCall(nullptr, {}, args[0]); ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
if (ordered) {
code.mfence();
}
code.CallFunction(memory_read_128); code.CallFunction(memory_read_128);
ctx.reg_alloc.DefineValue(inst, xmm1); ctx.reg_alloc.DefineValue(inst, xmm1);
} else { } else {
ctx.reg_alloc.HostCall(inst, {}, args[0]); ctx.reg_alloc.HostCall(inst, {}, args[0]);
if (ordered) {
code.mfence();
}
Devirtualize<callback>(conf.callbacks).EmitCall(code); Devirtualize<callback>(conf.callbacks).EmitCall(code);
code.ZeroExtendFrom(bitsize, code.ABI_RETURN); code.ZeroExtendFrom(bitsize, code.ABI_RETURN);
} }
@ -516,8 +585,7 @@ void A64EmitX64::EmitMemoryRead(A64EmitContext& ctx, IR::Inst* inst) {
// Use fastmem // Use fastmem
const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling);
const auto location = code.getCurr(); const auto location = EmitReadMemoryMov<bitsize>(code, value_idx, src_ptr, ordered);
EmitReadMemoryMov<bitsize>(code, value_idx, src_ptr);
fastmem_patch_info.emplace( fastmem_patch_info.emplace(
Common::BitCast<u64>(location), Common::BitCast<u64>(location),
@ -532,13 +600,16 @@ void A64EmitX64::EmitMemoryRead(A64EmitContext& ctx, IR::Inst* inst) {
ASSERT(conf.page_table); ASSERT(conf.page_table);
const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr);
require_abort_handling = true; require_abort_handling = true;
EmitReadMemoryMov<bitsize>(code, value_idx, src_ptr); EmitReadMemoryMov<bitsize>(code, value_idx, src_ptr, ordered);
} }
code.L(end); code.L(end);
if (require_abort_handling) { if (require_abort_handling) {
code.SwitchToFarCode(); code.SwitchToFarCode();
code.L(abort); code.L(abort);
if (ordered) {
code.mfence();
}
code.call(wrapped_fn); code.call(wrapped_fn);
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
code.SwitchToNearCode(); code.SwitchToNearCode();
@ -554,6 +625,8 @@ void A64EmitX64::EmitMemoryRead(A64EmitContext& ctx, IR::Inst* inst) {
template<std::size_t bitsize, auto callback> template<std::size_t bitsize, auto callback>
void A64EmitX64::EmitMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const IR::AccessType acctype = args[2].GetImmediateAccType();
const bool ordered = acctype == IR::AccessType::ORDERED || acctype == IR::AccessType::ORDEREDRW || acctype == IR::AccessType::LIMITEDORDERED;
const auto fastmem_marker = ShouldFastmem(ctx, inst); const auto fastmem_marker = ShouldFastmem(ctx, inst);
if (!conf.page_table && !fastmem_marker) { if (!conf.page_table && !fastmem_marker) {
@ -568,11 +641,16 @@ void A64EmitX64::EmitMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
Devirtualize<callback>(conf.callbacks).EmitCall(code); Devirtualize<callback>(conf.callbacks).EmitCall(code);
} }
if (ordered) {
code.mfence();
}
return; return;
} }
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.UseXmm(args[1]).getIdx() : ctx.reg_alloc.UseGpr(args[1]).getIdx(); const int value_idx = bitsize == 128
? ctx.reg_alloc.UseXmm(args[1]).getIdx()
: (ordered ? ctx.reg_alloc.UseScratchGpr(args[1]).getIdx() : ctx.reg_alloc.UseGpr(args[1]).getIdx());
const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]; const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)];
@ -584,7 +662,7 @@ void A64EmitX64::EmitMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) {
const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling);
const auto location = code.getCurr(); const auto location = code.getCurr();
EmitWriteMemoryMov<bitsize>(code, dest_ptr, value_idx); EmitWriteMemoryMov<bitsize>(code, dest_ptr, value_idx, ordered);
fastmem_patch_info.emplace( fastmem_patch_info.emplace(
Common::BitCast<u64>(location), Common::BitCast<u64>(location),
@ -599,7 +677,7 @@ void A64EmitX64::EmitMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) {
ASSERT(conf.page_table); ASSERT(conf.page_table);
const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr);
require_abort_handling = true; require_abort_handling = true;
EmitWriteMemoryMov<bitsize>(code, dest_ptr, value_idx); EmitWriteMemoryMov<bitsize>(code, dest_ptr, value_idx, ordered);
} }
code.L(end); code.L(end);
@ -607,6 +685,9 @@ void A64EmitX64::EmitMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) {
code.SwitchToFarCode(); code.SwitchToFarCode();
code.L(abort); code.L(abort);
code.call(wrapped_fn); code.call(wrapped_fn);
if (ordered) {
code.mfence();
}
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
code.SwitchToNearCode(); code.SwitchToNearCode();
} }
@ -656,6 +737,8 @@ template<std::size_t bitsize, auto callback>
void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
ASSERT(conf.global_monitor != nullptr); ASSERT(conf.global_monitor != nullptr);
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const IR::AccessType acctype = args[2].GetImmediateAccType();
const bool ordered = acctype == IR::AccessType::ORDERED || acctype == IR::AccessType::ORDEREDRW || acctype == IR::AccessType::LIMITEDORDERED;
if constexpr (bitsize != 128) { if constexpr (bitsize != 128) {
using T = mp::unsigned_integer_of_size<bitsize>; using T = mp::unsigned_integer_of_size<bitsize>;
@ -664,6 +747,9 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf)); code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
if (ordered) {
code.mfence();
}
code.CallLambda( code.CallLambda(
[](A64::UserConfig& conf, u64 vaddr) -> T { [](A64::UserConfig& conf, u64 vaddr) -> T {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T { return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
@ -681,6 +767,9 @@ void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) {
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf)); code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
if (ordered) {
code.mfence();
}
code.CallLambda( code.CallLambda(
[](A64::UserConfig& conf, u64 vaddr, A64::Vector& ret) { [](A64::UserConfig& conf, u64 vaddr, A64::Vector& ret) {
ret = conf.global_monitor->ReadAndMark<A64::Vector>(conf.processor_id, vaddr, [&]() -> A64::Vector { ret = conf.global_monitor->ReadAndMark<A64::Vector>(conf.processor_id, vaddr, [&]() -> A64::Vector {
@ -698,6 +787,8 @@ template<std::size_t bitsize, auto callback>
void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
ASSERT(conf.global_monitor != nullptr); ASSERT(conf.global_monitor != nullptr);
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const IR::AccessType acctype = args[3].GetImmediateAccType();
const bool ordered = acctype == IR::AccessType::ORDERED || acctype == IR::AccessType::ORDEREDRW || acctype == IR::AccessType::LIMITEDORDERED;
if constexpr (bitsize != 128) { if constexpr (bitsize != 128) {
ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]);
@ -727,6 +818,9 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
? 0 ? 0
: 1; : 1;
}); });
if (ordered) {
code.mfence();
}
} else { } else {
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
@ -740,6 +834,9 @@ void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) {
? 0 ? 0
: 1; : 1;
}); });
if (ordered) {
code.mfence();
}
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
} }
code.L(end); code.L(end);
@ -754,6 +851,8 @@ void A64EmitX64::EmitExclusiveReadMemoryInline(A64EmitContext& ctx, IR::Inst* in
} }
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const IR::AccessType acctype = args[2].GetImmediateAccType();
const bool ordered = acctype == IR::AccessType::ORDERED || acctype == IR::AccessType::ORDEREDRW || acctype == IR::AccessType::LIMITEDORDERED;
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx(); const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx();
@ -776,7 +875,7 @@ void A64EmitX64::EmitExclusiveReadMemoryInline(A64EmitContext& ctx, IR::Inst* in
const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling);
const auto location = code.getCurr(); const auto location = code.getCurr();
EmitReadMemoryMov<bitsize>(code, value_idx, src_ptr); EmitReadMemoryMov<bitsize>(code, value_idx, src_ptr, ordered);
fastmem_patch_info.emplace( fastmem_patch_info.emplace(
Common::BitCast<u64>(location), Common::BitCast<u64>(location),
@ -801,7 +900,7 @@ void A64EmitX64::EmitExclusiveReadMemoryInline(A64EmitContext& ctx, IR::Inst* in
} }
code.mov(tmp, Common::BitCast<u64>(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); code.mov(tmp, Common::BitCast<u64>(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id)));
EmitWriteMemoryMov<bitsize>(code, tmp, value_idx); EmitWriteMemoryMov<bitsize>(code, tmp, value_idx, false);
EmitExclusiveUnlock(code, conf, tmp, tmp2.cvt32()); EmitExclusiveUnlock(code, conf, tmp, tmp2.cvt32());
@ -821,6 +920,8 @@ void A64EmitX64::EmitExclusiveWriteMemoryInline(A64EmitContext& ctx, IR::Inst* i
} }
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const IR::AccessType acctype = args[3].GetImmediateAccType();
const bool ordered = acctype == IR::AccessType::ORDERED || acctype == IR::AccessType::ORDEREDRW || acctype == IR::AccessType::LIMITEDORDERED;
const auto value = [&] { const auto value = [&] {
if constexpr (bitsize == 128) { if constexpr (bitsize == 128) {
@ -869,7 +970,7 @@ void A64EmitX64::EmitExclusiveWriteMemoryInline(A64EmitContext& ctx, IR::Inst* i
code.movq(rcx, xmm0); code.movq(rcx, xmm0);
} }
} else { } else {
EmitReadMemoryMov<bitsize>(code, rax.getIdx(), tmp); EmitReadMemoryMov<bitsize>(code, rax.getIdx(), tmp, false);
} }
const auto fastmem_marker = ShouldFastmem(ctx, inst); const auto fastmem_marker = ShouldFastmem(ctx, inst);
@ -907,6 +1008,10 @@ void A64EmitX64::EmitExclusiveWriteMemoryInline(A64EmitContext& ctx, IR::Inst* i
} }
} }
if (ordered) {
code.mfence();
}
code.setnz(status.cvt8()); code.setnz(status.cvt8());
code.SwitchToFarCode(); code.SwitchToFarCode();
@ -922,6 +1027,10 @@ void A64EmitX64::EmitExclusiveWriteMemoryInline(A64EmitContext& ctx, IR::Inst* i
conf.recompile_on_exclusive_fastmem_failure, conf.recompile_on_exclusive_fastmem_failure,
}); });
if (ordered) {
code.mfence();
}
code.cmp(al, 0); code.cmp(al, 0);
code.setz(status.cvt8()); code.setz(status.cvt8());
code.movzx(status.cvt32(), status.cvt8()); code.movzx(status.cvt32(), status.cvt8());
@ -929,6 +1038,9 @@ void A64EmitX64::EmitExclusiveWriteMemoryInline(A64EmitContext& ctx, IR::Inst* i
code.SwitchToNearCode(); code.SwitchToNearCode();
} else { } else {
code.call(fallback_fn); code.call(fallback_fn);
if (ordered) {
code.mfence();
}
code.cmp(al, 0); code.cmp(al, 0);
code.setz(status.cvt8()); code.setz(status.cvt8());
code.movzx(status.cvt32(), status.cvt8()); code.movzx(status.cvt32(), status.cvt8());

View file

@ -207,6 +207,11 @@ IR::Cond Argument::GetImmediateCond() const {
return value.GetCond(); return value.GetCond();
} }
IR::AccessType Argument::GetImmediateAccType() const {
ASSERT(IsImmediate() && GetType() == IR::Type::AccessType);
return value.GetAccType();
}
bool Argument::IsInGpr() const { bool Argument::IsInGpr() const {
if (IsImmediate()) if (IsImmediate())
return false; return false;
@ -410,7 +415,7 @@ void RegAlloc::HostCall(IR::Inst* result_def,
for (size_t i = 0; i < args_count; i++) { for (size_t i = 0; i < args_count; i++) {
if (args[i] && !args[i]->get().IsVoid()) { if (args[i] && !args[i]->get().IsVoid()) {
UseScratch(*args[i], args_hostloc[i]); UseScratch(*args[i], args_hostloc[i]);
#if defined(__llvm__) && !defined(_WIN32)
// LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]); const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
switch (args[i]->get().GetType()) { switch (args[i]->get().GetType()) {
@ -420,10 +425,12 @@ void RegAlloc::HostCall(IR::Inst* result_def,
case IR::Type::U16: case IR::Type::U16:
code.movzx(reg.cvt32(), reg.cvt16()); code.movzx(reg.cvt32(), reg.cvt16());
break; break;
case IR::Type::U32:
code.mov(reg.cvt32(), reg.cvt32());
break;
default: default:
break; // Nothing needs to be done break; // Nothing needs to be done
} }
#endif
} }
} }

View file

@ -75,6 +75,7 @@ public:
u64 GetImmediateS32() const; u64 GetImmediateS32() const;
u64 GetImmediateU64() const; u64 GetImmediateU64() const;
IR::Cond GetImmediateCond() const; IR::Cond GetImmediateCond() const;
IR::AccessType GetImmediateAccType() const;
/// Is this value currently in a GPR? /// Is this value currently in a GPR?
bool IsInGpr() const; bool IsInGpr() const;

View file

@ -60,7 +60,7 @@ bool TranslatorVisitor::arm_LDA(Cond cond, Reg n, Reg t) {
} }
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ReadMemory32(address)); // AccType::Ordered ir.SetRegister(t, ir.ReadMemory32(address)); // AccessType::Ordered
return true; return true;
} }
// LDAB<c> <Rt>, [<Rn>] // LDAB<c> <Rt>, [<Rn>]
@ -74,7 +74,7 @@ bool TranslatorVisitor::arm_LDAB(Cond cond, Reg n, Reg t) {
} }
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ZeroExtendToWord(ir.ReadMemory8(address))); // AccType::Ordered ir.SetRegister(t, ir.ZeroExtendToWord(ir.ReadMemory8(address))); // AccessType::Ordered
return true; return true;
} }
// LDAH<c> <Rt>, [<Rn>] // LDAH<c> <Rt>, [<Rn>]
@ -88,7 +88,7 @@ bool TranslatorVisitor::arm_LDAH(Cond cond, Reg n, Reg t) {
} }
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ZeroExtendToWord(ir.ReadMemory16(address))); // AccType::Ordered ir.SetRegister(t, ir.ZeroExtendToWord(ir.ReadMemory16(address))); // AccessType::Ordered
return true; return true;
} }
@ -103,7 +103,7 @@ bool TranslatorVisitor::arm_LDAEX(Cond cond, Reg n, Reg t) {
} }
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ExclusiveReadMemory32(address)); // AccType::Ordered ir.SetRegister(t, ir.ExclusiveReadMemory32(address)); // AccessType::Ordered
return true; return true;
} }
@ -118,7 +118,7 @@ bool TranslatorVisitor::arm_LDAEXB(Cond cond, Reg n, Reg t) {
} }
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ExclusiveReadMemory8(address))); // AccType::Ordered ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ExclusiveReadMemory8(address))); // AccessType::Ordered
return true; return true;
} }
@ -133,7 +133,7 @@ bool TranslatorVisitor::arm_LDAEXD(Cond cond, Reg n, Reg t) {
} }
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
const auto [lo, hi] = ir.ExclusiveReadMemory64(address); // AccType::Ordered const auto [lo, hi] = ir.ExclusiveReadMemory64(address); // AccessType::Ordered
// DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR // DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR
ir.SetRegister(t, lo); ir.SetRegister(t, lo);
ir.SetRegister(t + 1, hi); ir.SetRegister(t + 1, hi);
@ -151,7 +151,7 @@ bool TranslatorVisitor::arm_LDAEXH(Cond cond, Reg n, Reg t) {
} }
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ExclusiveReadMemory16(address))); // AccType::Ordered ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ExclusiveReadMemory16(address))); // AccessType::Ordered
return true; return true;
} }
@ -166,7 +166,7 @@ bool TranslatorVisitor::arm_STL(Cond cond, Reg n, Reg t) {
} }
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
ir.WriteMemory32(address, ir.GetRegister(t)); // AccType::Ordered ir.WriteMemory32(address, ir.GetRegister(t)); // AccessType::Ordered
return true; return true;
} }
@ -181,7 +181,7 @@ bool TranslatorVisitor::arm_STLB(Cond cond, Reg n, Reg t) {
} }
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
ir.WriteMemory8(address, ir.LeastSignificantByte(ir.GetRegister(t))); // AccType::Ordered ir.WriteMemory8(address, ir.LeastSignificantByte(ir.GetRegister(t))); // AccessType::Ordered
return true; return true;
} }
@ -196,7 +196,7 @@ bool TranslatorVisitor::arm_STLH(Cond cond, Reg n, Reg t) {
} }
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
ir.WriteMemory16(address, ir.LeastSignificantHalf(ir.GetRegister(t))); // AccType::Ordered ir.WriteMemory16(address, ir.LeastSignificantHalf(ir.GetRegister(t))); // AccessType::Ordered
return true; return true;
} }
@ -216,7 +216,7 @@ bool TranslatorVisitor::arm_STLEXB(Cond cond, Reg n, Reg d, Reg t) {
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
const auto value = ir.LeastSignificantByte(ir.GetRegister(t)); const auto value = ir.LeastSignificantByte(ir.GetRegister(t));
const auto passed = ir.ExclusiveWriteMemory8(address, value); // AccType::Ordered const auto passed = ir.ExclusiveWriteMemory8(address, value); // AccessType::Ordered
ir.SetRegister(d, passed); ir.SetRegister(d, passed);
return true; return true;
} }
@ -238,7 +238,7 @@ bool TranslatorVisitor::arm_STLEXD(Cond cond, Reg n, Reg d, Reg t) {
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
const auto value_lo = ir.GetRegister(t); const auto value_lo = ir.GetRegister(t);
const auto value_hi = ir.GetRegister(t2); const auto value_hi = ir.GetRegister(t2);
const auto passed = ir.ExclusiveWriteMemory64(address, value_lo, value_hi); // AccType::Ordered const auto passed = ir.ExclusiveWriteMemory64(address, value_lo, value_hi); // AccessType::Ordered
ir.SetRegister(d, passed); ir.SetRegister(d, passed);
return true; return true;
} }
@ -259,7 +259,7 @@ bool TranslatorVisitor::arm_STLEXH(Cond cond, Reg n, Reg d, Reg t) {
const auto address = ir.GetRegister(n); const auto address = ir.GetRegister(n);
const auto value = ir.LeastSignificantHalf(ir.GetRegister(t)); const auto value = ir.LeastSignificantHalf(ir.GetRegister(t));
const auto passed = ir.ExclusiveWriteMemory16(address, value); // AccType::Ordered const auto passed = ir.ExclusiveWriteMemory16(address, value); // AccessType::Ordered
ir.SetRegister(d, passed); ir.SetRegister(d, passed);
return true; return true;
} }

View file

@ -105,84 +105,84 @@ void IREmitter::ClearExclusive() {
Inst(Opcode::A64ClearExclusive); Inst(Opcode::A64ClearExclusive);
} }
IR::U8 IREmitter::ReadMemory8(const IR::U64& vaddr) { IR::U8 IREmitter::ReadMemory8(const IR::U64& vaddr, IR::AccessType acctype) {
return Inst<IR::U8>(Opcode::A64ReadMemory8, vaddr); return Inst<IR::U8>(Opcode::A64ReadMemory8, vaddr, IR::Value(acctype));
} }
IR::U16 IREmitter::ReadMemory16(const IR::U64& vaddr) { IR::U16 IREmitter::ReadMemory16(const IR::U64& vaddr, IR::AccessType acctype) {
return Inst<IR::U16>(Opcode::A64ReadMemory16, vaddr); return Inst<IR::U16>(Opcode::A64ReadMemory16, vaddr, IR::Value(acctype));
} }
IR::U32 IREmitter::ReadMemory32(const IR::U64& vaddr) { IR::U32 IREmitter::ReadMemory32(const IR::U64& vaddr, IR::AccessType acctype) {
return Inst<IR::U32>(Opcode::A64ReadMemory32, vaddr); return Inst<IR::U32>(Opcode::A64ReadMemory32, vaddr, IR::Value(acctype));
} }
IR::U64 IREmitter::ReadMemory64(const IR::U64& vaddr) { IR::U64 IREmitter::ReadMemory64(const IR::U64& vaddr, IR::AccessType acctype) {
return Inst<IR::U64>(Opcode::A64ReadMemory64, vaddr); return Inst<IR::U64>(Opcode::A64ReadMemory64, vaddr, IR::Value(acctype));
} }
IR::U128 IREmitter::ReadMemory128(const IR::U64& vaddr) { IR::U128 IREmitter::ReadMemory128(const IR::U64& vaddr, IR::AccessType acctype) {
return Inst<IR::U128>(Opcode::A64ReadMemory128, vaddr); return Inst<IR::U128>(Opcode::A64ReadMemory128, vaddr, IR::Value(acctype));
} }
IR::U8 IREmitter::ExclusiveReadMemory8(const IR::U64& vaddr) { IR::U8 IREmitter::ExclusiveReadMemory8(const IR::U64& vaddr, IR::AccessType acctype) {
return Inst<IR::U8>(Opcode::A64ExclusiveReadMemory8, vaddr); return Inst<IR::U8>(Opcode::A64ExclusiveReadMemory8, vaddr, IR::Value(acctype));
} }
IR::U16 IREmitter::ExclusiveReadMemory16(const IR::U64& vaddr) { IR::U16 IREmitter::ExclusiveReadMemory16(const IR::U64& vaddr, IR::AccessType acctype) {
return Inst<IR::U16>(Opcode::A64ExclusiveReadMemory16, vaddr); return Inst<IR::U16>(Opcode::A64ExclusiveReadMemory16, vaddr, IR::Value(acctype));
} }
IR::U32 IREmitter::ExclusiveReadMemory32(const IR::U64& vaddr) { IR::U32 IREmitter::ExclusiveReadMemory32(const IR::U64& vaddr, IR::AccessType acctype) {
return Inst<IR::U32>(Opcode::A64ExclusiveReadMemory32, vaddr); return Inst<IR::U32>(Opcode::A64ExclusiveReadMemory32, vaddr, IR::Value(acctype));
} }
IR::U64 IREmitter::ExclusiveReadMemory64(const IR::U64& vaddr) { IR::U64 IREmitter::ExclusiveReadMemory64(const IR::U64& vaddr, IR::AccessType acctype) {
return Inst<IR::U64>(Opcode::A64ExclusiveReadMemory64, vaddr); return Inst<IR::U64>(Opcode::A64ExclusiveReadMemory64, vaddr, IR::Value(acctype));
} }
IR::U128 IREmitter::ExclusiveReadMemory128(const IR::U64& vaddr) { IR::U128 IREmitter::ExclusiveReadMemory128(const IR::U64& vaddr, IR::AccessType acctype) {
return Inst<IR::U128>(Opcode::A64ExclusiveReadMemory128, vaddr); return Inst<IR::U128>(Opcode::A64ExclusiveReadMemory128, vaddr, IR::Value(acctype));
} }
void IREmitter::WriteMemory8(const IR::U64& vaddr, const IR::U8& value) { void IREmitter::WriteMemory8(const IR::U64& vaddr, const IR::U8& value, IR::AccessType acctype) {
Inst(Opcode::A64WriteMemory8, vaddr, value); Inst(Opcode::A64WriteMemory8, vaddr, value, IR::Value(acctype));
} }
void IREmitter::WriteMemory16(const IR::U64& vaddr, const IR::U16& value) { void IREmitter::WriteMemory16(const IR::U64& vaddr, const IR::U16& value, IR::AccessType acctype) {
Inst(Opcode::A64WriteMemory16, vaddr, value); Inst(Opcode::A64WriteMemory16, vaddr, value, IR::Value(acctype));
} }
void IREmitter::WriteMemory32(const IR::U64& vaddr, const IR::U32& value) { void IREmitter::WriteMemory32(const IR::U64& vaddr, const IR::U32& value, IR::AccessType acctype) {
Inst(Opcode::A64WriteMemory32, vaddr, value); Inst(Opcode::A64WriteMemory32, vaddr, value, IR::Value(acctype));
} }
void IREmitter::WriteMemory64(const IR::U64& vaddr, const IR::U64& value) { void IREmitter::WriteMemory64(const IR::U64& vaddr, const IR::U64& value, IR::AccessType acctype) {
Inst(Opcode::A64WriteMemory64, vaddr, value); Inst(Opcode::A64WriteMemory64, vaddr, value, IR::Value(acctype));
} }
void IREmitter::WriteMemory128(const IR::U64& vaddr, const IR::U128& value) { void IREmitter::WriteMemory128(const IR::U64& vaddr, const IR::U128& value, IR::AccessType acctype) {
Inst(Opcode::A64WriteMemory128, vaddr, value); Inst(Opcode::A64WriteMemory128, vaddr, value, IR::Value(acctype));
} }
IR::U32 IREmitter::ExclusiveWriteMemory8(const IR::U64& vaddr, const IR::U8& value) { IR::U32 IREmitter::ExclusiveWriteMemory8(const IR::U64& vaddr, const IR::U8& value, IR::AccessType acctype) {
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory8, vaddr, value); return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory8, vaddr, value, IR::Value(acctype));
} }
IR::U32 IREmitter::ExclusiveWriteMemory16(const IR::U64& vaddr, const IR::U16& value) { IR::U32 IREmitter::ExclusiveWriteMemory16(const IR::U64& vaddr, const IR::U16& value, IR::AccessType acctype) {
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory16, vaddr, value); return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory16, vaddr, value, IR::Value(acctype));
} }
IR::U32 IREmitter::ExclusiveWriteMemory32(const IR::U64& vaddr, const IR::U32& value) { IR::U32 IREmitter::ExclusiveWriteMemory32(const IR::U64& vaddr, const IR::U32& value, IR::AccessType acctype) {
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory32, vaddr, value); return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory32, vaddr, value, IR::Value(acctype));
} }
IR::U32 IREmitter::ExclusiveWriteMemory64(const IR::U64& vaddr, const IR::U64& value) { IR::U32 IREmitter::ExclusiveWriteMemory64(const IR::U64& vaddr, const IR::U64& value, IR::AccessType acctype) {
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory64, vaddr, value); return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory64, vaddr, value, IR::Value(acctype));
} }
IR::U32 IREmitter::ExclusiveWriteMemory128(const IR::U64& vaddr, const IR::U128& value) { IR::U32 IREmitter::ExclusiveWriteMemory128(const IR::U64& vaddr, const IR::U128& value, IR::AccessType acctype) {
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory128, vaddr, value); return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory128, vaddr, value, IR::Value(acctype));
} }
IR::U32 IREmitter::GetW(Reg reg) { IR::U32 IREmitter::GetW(Reg reg) {

View file

@ -56,26 +56,26 @@ public:
void SetTPIDR(const IR::U64& value); void SetTPIDR(const IR::U64& value);
void ClearExclusive(); void ClearExclusive();
IR::U8 ReadMemory8(const IR::U64& vaddr); IR::U8 ReadMemory8(const IR::U64& vaddr, IR::AccessType acctype);
IR::U16 ReadMemory16(const IR::U64& vaddr); IR::U16 ReadMemory16(const IR::U64& vaddr, IR::AccessType acctype);
IR::U32 ReadMemory32(const IR::U64& vaddr); IR::U32 ReadMemory32(const IR::U64& vaddr, IR::AccessType acctype);
IR::U64 ReadMemory64(const IR::U64& vaddr); IR::U64 ReadMemory64(const IR::U64& vaddr, IR::AccessType acctype);
IR::U128 ReadMemory128(const IR::U64& vaddr); IR::U128 ReadMemory128(const IR::U64& vaddr, IR::AccessType acctype);
IR::U8 ExclusiveReadMemory8(const IR::U64& vaddr); IR::U8 ExclusiveReadMemory8(const IR::U64& vaddr, IR::AccessType acctype);
IR::U16 ExclusiveReadMemory16(const IR::U64& vaddr); IR::U16 ExclusiveReadMemory16(const IR::U64& vaddr, IR::AccessType acctype);
IR::U32 ExclusiveReadMemory32(const IR::U64& vaddr); IR::U32 ExclusiveReadMemory32(const IR::U64& vaddr, IR::AccessType acctype);
IR::U64 ExclusiveReadMemory64(const IR::U64& vaddr); IR::U64 ExclusiveReadMemory64(const IR::U64& vaddr, IR::AccessType acctype);
IR::U128 ExclusiveReadMemory128(const IR::U64& vaddr); IR::U128 ExclusiveReadMemory128(const IR::U64& vaddr, IR::AccessType acctype);
void WriteMemory8(const IR::U64& vaddr, const IR::U8& value); void WriteMemory8(const IR::U64& vaddr, const IR::U8& value, IR::AccessType acctype);
void WriteMemory16(const IR::U64& vaddr, const IR::U16& value); void WriteMemory16(const IR::U64& vaddr, const IR::U16& value, IR::AccessType acctype);
void WriteMemory32(const IR::U64& vaddr, const IR::U32& value); void WriteMemory32(const IR::U64& vaddr, const IR::U32& value, IR::AccessType acctype);
void WriteMemory64(const IR::U64& vaddr, const IR::U64& value); void WriteMemory64(const IR::U64& vaddr, const IR::U64& value, IR::AccessType acctype);
void WriteMemory128(const IR::U64& vaddr, const IR::U128& value); void WriteMemory128(const IR::U64& vaddr, const IR::U128& value, IR::AccessType acctype);
IR::U32 ExclusiveWriteMemory8(const IR::U64& vaddr, const IR::U8& value); IR::U32 ExclusiveWriteMemory8(const IR::U64& vaddr, const IR::U8& value, IR::AccessType acctype);
IR::U32 ExclusiveWriteMemory16(const IR::U64& vaddr, const IR::U16& value); IR::U32 ExclusiveWriteMemory16(const IR::U64& vaddr, const IR::U16& value, IR::AccessType acctype);
IR::U32 ExclusiveWriteMemory32(const IR::U64& vaddr, const IR::U32& value); IR::U32 ExclusiveWriteMemory32(const IR::U64& vaddr, const IR::U32& value, IR::AccessType acctype);
IR::U32 ExclusiveWriteMemory64(const IR::U64& vaddr, const IR::U64& value); IR::U32 ExclusiveWriteMemory64(const IR::U64& vaddr, const IR::U64& value, IR::AccessType acctype);
IR::U32 ExclusiveWriteMemory128(const IR::U64& vaddr, const IR::U128& value); IR::U32 ExclusiveWriteMemory128(const IR::U64& vaddr, const IR::U128& value, IR::AccessType acctype);
IR::U32 GetW(Reg source_reg); IR::U32 GetW(Reg source_reg);
IR::U64 GetX(Reg source_reg); IR::U64 GetX(Reg source_reg);

View file

@ -217,74 +217,74 @@ void TranslatorVisitor::Vpart_scalar(size_t bitsize, Vec vec, size_t part, IR::U
} }
} }
IR::UAnyU128 TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, IR::AccType /*acc_type*/) { IR::UAnyU128 TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, IR::AccessType acctype) {
switch (bytesize) { switch (bytesize) {
case 1: case 1:
return ir.ReadMemory8(address); return ir.ReadMemory8(address, acctype);
case 2: case 2:
return ir.ReadMemory16(address); return ir.ReadMemory16(address, acctype);
case 4: case 4:
return ir.ReadMemory32(address); return ir.ReadMemory32(address, acctype);
case 8: case 8:
return ir.ReadMemory64(address); return ir.ReadMemory64(address, acctype);
case 16: case 16:
return ir.ReadMemory128(address); return ir.ReadMemory128(address, acctype);
default: default:
ASSERT_FALSE("Invalid bytesize parameter {}", bytesize); ASSERT_FALSE("Invalid bytesize parameter {}", bytesize);
} }
} }
void TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, IR::AccType /*acc_type*/, IR::UAnyU128 value) { void TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, IR::AccessType acctype, IR::UAnyU128 value) {
switch (bytesize) { switch (bytesize) {
case 1: case 1:
ir.WriteMemory8(address, value); ir.WriteMemory8(address, value, acctype);
return; return;
case 2: case 2:
ir.WriteMemory16(address, value); ir.WriteMemory16(address, value, acctype);
return; return;
case 4: case 4:
ir.WriteMemory32(address, value); ir.WriteMemory32(address, value, acctype);
return; return;
case 8: case 8:
ir.WriteMemory64(address, value); ir.WriteMemory64(address, value, acctype);
return; return;
case 16: case 16:
ir.WriteMemory128(address, value); ir.WriteMemory128(address, value, acctype);
return; return;
default: default:
ASSERT_FALSE("Invalid bytesize parameter {}", bytesize); ASSERT_FALSE("Invalid bytesize parameter {}", bytesize);
} }
} }
IR::UAnyU128 TranslatorVisitor::ExclusiveMem(IR::U64 address, size_t bytesize, IR::AccType /*acctype*/) { IR::UAnyU128 TranslatorVisitor::ExclusiveMem(IR::U64 address, size_t bytesize, IR::AccessType acctype) {
switch (bytesize) { switch (bytesize) {
case 1: case 1:
return ir.ExclusiveReadMemory8(address); return ir.ExclusiveReadMemory8(address, acctype);
case 2: case 2:
return ir.ExclusiveReadMemory16(address); return ir.ExclusiveReadMemory16(address, acctype);
case 4: case 4:
return ir.ExclusiveReadMemory32(address); return ir.ExclusiveReadMemory32(address, acctype);
case 8: case 8:
return ir.ExclusiveReadMemory64(address); return ir.ExclusiveReadMemory64(address, acctype);
case 16: case 16:
return ir.ExclusiveReadMemory128(address); return ir.ExclusiveReadMemory128(address, acctype);
default: default:
ASSERT_FALSE("Invalid bytesize parameter {}", bytesize); ASSERT_FALSE("Invalid bytesize parameter {}", bytesize);
} }
} }
IR::U32 TranslatorVisitor::ExclusiveMem(IR::U64 address, size_t bytesize, IR::AccType /*acctype*/, IR::UAnyU128 value) { IR::U32 TranslatorVisitor::ExclusiveMem(IR::U64 address, size_t bytesize, IR::AccessType acctype, IR::UAnyU128 value) {
switch (bytesize) { switch (bytesize) {
case 1: case 1:
return ir.ExclusiveWriteMemory8(address, value); return ir.ExclusiveWriteMemory8(address, value, acctype);
case 2: case 2:
return ir.ExclusiveWriteMemory16(address, value); return ir.ExclusiveWriteMemory16(address, value, acctype);
case 4: case 4:
return ir.ExclusiveWriteMemory32(address, value); return ir.ExclusiveWriteMemory32(address, value, acctype);
case 8: case 8:
return ir.ExclusiveWriteMemory64(address, value); return ir.ExclusiveWriteMemory64(address, value, acctype);
case 16: case 16:
return ir.ExclusiveWriteMemory128(address, value); return ir.ExclusiveWriteMemory128(address, value, acctype);
default: default:
ASSERT_FALSE("Invalid bytesize parameter {}", bytesize); ASSERT_FALSE("Invalid bytesize parameter {}", bytesize);
} }

View file

@ -55,10 +55,10 @@ struct TranslatorVisitor final {
IR::UAny Vpart_scalar(size_t bitsize, Vec vec, size_t part); IR::UAny Vpart_scalar(size_t bitsize, Vec vec, size_t part);
void Vpart_scalar(size_t bitsize, Vec vec, size_t part, IR::UAny value); void Vpart_scalar(size_t bitsize, Vec vec, size_t part, IR::UAny value);
IR::UAnyU128 Mem(IR::U64 address, size_t size, IR::AccType acctype); IR::UAnyU128 Mem(IR::U64 address, size_t size, IR::AccessType acctype);
void Mem(IR::U64 address, size_t size, IR::AccType acctype, IR::UAnyU128 value); void Mem(IR::U64 address, size_t size, IR::AccessType acctype, IR::UAnyU128 value);
IR::UAnyU128 ExclusiveMem(IR::U64 address, size_t size, IR::AccType acctype); IR::UAnyU128 ExclusiveMem(IR::U64 address, size_t size, IR::AccessType acctype);
IR::U32 ExclusiveMem(IR::U64 address, size_t size, IR::AccType acctype, IR::UAnyU128 value); IR::U32 ExclusiveMem(IR::U64 address, size_t size, IR::AccessType acctype, IR::UAnyU128 value);
IR::U32U64 SignExtend(IR::UAny value, size_t to_size); IR::U32U64 SignExtend(IR::UAny value, size_t to_size);
IR::U32U64 ZeroExtend(IR::UAny value, size_t to_size); IR::U32U64 ZeroExtend(IR::UAny value, size_t to_size);

View file

@ -12,7 +12,7 @@ namespace Dynarmic::A64 {
static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& v, bool pair, size_t size, bool L, bool o0, std::optional<Reg> Rs, std::optional<Reg> Rt2, Reg Rn, Reg Rt) { static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& v, bool pair, size_t size, bool L, bool o0, std::optional<Reg> Rs, std::optional<Reg> Rt2, Reg Rn, Reg Rt) {
// Shared Decode // Shared Decode
const auto acctype = o0 ? IR::AccType::ORDERED : IR::AccType::ATOMIC; const auto acctype = o0 ? IR::AccessType::ORDERED : IR::AccessType::ATOMIC;
const auto memop = L ? IR::MemOp::LOAD : IR::MemOp::STORE; const auto memop = L ? IR::MemOp::LOAD : IR::MemOp::STORE;
const size_t elsize = 8 << size; const size_t elsize = 8 << size;
const size_t regsize = elsize == 64 ? 64 : 32; const size_t regsize = elsize == 64 ? 64 : 32;
@ -142,7 +142,7 @@ bool TranslatorVisitor::LDAXP(Imm<1> sz, Reg Rt2, Reg Rn, Reg Rt) {
static bool OrderedSharedDecodeAndOperation(TranslatorVisitor& v, size_t size, bool L, bool o0, Reg Rn, Reg Rt) { static bool OrderedSharedDecodeAndOperation(TranslatorVisitor& v, size_t size, bool L, bool o0, Reg Rn, Reg Rt) {
// Shared Decode // Shared Decode
const auto acctype = !o0 ? IR::AccType::LIMITEDORDERED : IR::AccType::ORDERED; const auto acctype = !o0 ? IR::AccessType::LIMITEDORDERED : IR::AccessType::ORDERED;
const auto memop = L ? IR::MemOp::LOAD : IR::MemOp::STORE; const auto memop = L ? IR::MemOp::LOAD : IR::MemOp::STORE;
const size_t elsize = 8 << size; const size_t elsize = 8 << size;
const size_t regsize = elsize == 64 ? 64 : 32; const size_t regsize = elsize == 64 ? 64 : 32;

View file

@ -12,7 +12,7 @@ bool TranslatorVisitor::LDR_lit_gen(bool opc_0, Imm<19> imm19, Reg Rt) {
const s64 offset = concatenate(imm19, Imm<2>{0}).SignExtend<s64>(); const s64 offset = concatenate(imm19, Imm<2>{0}).SignExtend<s64>();
const u64 address = ir.PC() + offset; const u64 address = ir.PC() + offset;
const auto data = Mem(ir.Imm64(address), size, IR::AccType::NORMAL); const auto data = Mem(ir.Imm64(address), size, IR::AccessType::NORMAL);
X(8 * size, Rt, data); X(8 * size, Rt, data);
return true; return true;
@ -26,7 +26,7 @@ bool TranslatorVisitor::LDR_lit_fpsimd(Imm<2> opc, Imm<19> imm19, Vec Vt) {
const u64 size = 4 << opc.ZeroExtend(); const u64 size = 4 << opc.ZeroExtend();
const u64 offset = imm19.SignExtend<u64>() << 2; const u64 offset = imm19.SignExtend<u64>() << 2;
const IR::U64 address = ir.Imm64(ir.PC() + offset); const IR::U64 address = ir.Imm64(ir.PC() + offset);
const IR::UAnyU128 data = Mem(address, size, IR::AccType::VEC); const IR::UAnyU128 data = Mem(address, size, IR::AccessType::VEC);
if (size == 16) { if (size == 16) {
V(128, Vt, data); V(128, Vt, data);
@ -39,7 +39,7 @@ bool TranslatorVisitor::LDR_lit_fpsimd(Imm<2> opc, Imm<19> imm19, Vec Vt) {
bool TranslatorVisitor::LDRSW_lit(Imm<19> imm19, Reg Rt) { bool TranslatorVisitor::LDRSW_lit(Imm<19> imm19, Reg Rt) {
const s64 offset = concatenate(imm19, Imm<2>{0}).SignExtend<s64>(); const s64 offset = concatenate(imm19, Imm<2>{0}).SignExtend<s64>();
const u64 address = ir.PC() + offset; const u64 address = ir.PC() + offset;
const auto data = Mem(ir.Imm64(address), 4, IR::AccType::NORMAL); const auto data = Mem(ir.Imm64(address), 4, IR::AccessType::NORMAL);
X(64, Rt, ir.SignExtendWordToLong(data)); X(64, Rt, ir.SignExtendWordToLong(data));
return true; return true;

View file

@ -67,11 +67,11 @@ static bool SharedDecodeAndOperation(TranslatorVisitor& v, bool wback, IR::MemOp
for (size_t r = 0; r < rpt; r++) { for (size_t r = 0; r < rpt; r++) {
const Vec tt = static_cast<Vec>((VecNumber(Vt) + r) % 32); const Vec tt = static_cast<Vec>((VecNumber(Vt) + r) % 32);
if (memop == IR::MemOp::LOAD) { if (memop == IR::MemOp::LOAD) {
const IR::UAnyU128 vec = v.Mem(v.ir.Add(address, offs), ebytes * elements, IR::AccType::VEC); const IR::UAnyU128 vec = v.Mem(v.ir.Add(address, offs), ebytes * elements, IR::AccessType::VEC);
v.V_scalar(datasize, tt, vec); v.V_scalar(datasize, tt, vec);
} else { } else {
const IR::UAnyU128 vec = v.V_scalar(datasize, tt); const IR::UAnyU128 vec = v.V_scalar(datasize, tt);
v.Mem(v.ir.Add(address, offs), ebytes * elements, IR::AccType::VEC, vec); v.Mem(v.ir.Add(address, offs), ebytes * elements, IR::AccessType::VEC, vec);
} }
offs = v.ir.Add(offs, v.ir.Imm64(ebytes * elements)); offs = v.ir.Add(offs, v.ir.Imm64(ebytes * elements));
} }
@ -80,12 +80,12 @@ static bool SharedDecodeAndOperation(TranslatorVisitor& v, bool wback, IR::MemOp
for (size_t s = 0; s < selem; s++) { for (size_t s = 0; s < selem; s++) {
const Vec tt = static_cast<Vec>((VecNumber(Vt) + s) % 32); const Vec tt = static_cast<Vec>((VecNumber(Vt) + s) % 32);
if (memop == IR::MemOp::LOAD) { if (memop == IR::MemOp::LOAD) {
const IR::UAny elem = v.Mem(v.ir.Add(address, offs), ebytes, IR::AccType::VEC); const IR::UAny elem = v.Mem(v.ir.Add(address, offs), ebytes, IR::AccessType::VEC);
const IR::U128 vec = v.ir.VectorSetElement(esize, v.V(datasize, tt), e, elem); const IR::U128 vec = v.ir.VectorSetElement(esize, v.V(datasize, tt), e, elem);
v.V(datasize, tt, vec); v.V(datasize, tt, vec);
} else { } else {
const IR::UAny elem = v.ir.VectorGetElement(esize, v.V(datasize, tt), e); const IR::UAny elem = v.ir.VectorGetElement(esize, v.V(datasize, tt), e);
v.Mem(v.ir.Add(address, offs), ebytes, IR::AccType::VEC, elem); v.Mem(v.ir.Add(address, offs), ebytes, IR::AccessType::VEC, elem);
} }
offs = v.ir.Add(offs, v.ir.Imm64(ebytes)); offs = v.ir.Add(offs, v.ir.Imm64(ebytes));
} }

View file

@ -43,11 +43,11 @@ static bool LoadStoreRegisterImmediate(TranslatorVisitor& v, bool wback, bool po
switch (memop) { switch (memop) {
case IR::MemOp::STORE: { case IR::MemOp::STORE: {
const auto data = v.X(datasize, Rt); const auto data = v.X(datasize, Rt);
v.Mem(address, datasize / 8, IR::AccType::NORMAL, data); v.Mem(address, datasize / 8, IR::AccessType::NORMAL, data);
break; break;
} }
case IR::MemOp::LOAD: { case IR::MemOp::LOAD: {
const auto data = v.Mem(address, datasize / 8, IR::AccType::NORMAL); const auto data = v.Mem(address, datasize / 8, IR::AccessType::NORMAL);
if (signed_) { if (signed_) {
v.X(regsize, Rt, v.SignExtend(data, regsize)); v.X(regsize, Rt, v.SignExtend(data, regsize));
} else { } else {
@ -115,7 +115,7 @@ bool TranslatorVisitor::PRFM_unscaled_imm([[maybe_unused]] Imm<9> imm9, [[maybe_
} }
static bool LoadStoreSIMD(TranslatorVisitor& v, bool wback, bool postindex, size_t scale, u64 offset, IR::MemOp memop, Reg Rn, Vec Vt) { static bool LoadStoreSIMD(TranslatorVisitor& v, bool wback, bool postindex, size_t scale, u64 offset, IR::MemOp memop, Reg Rn, Vec Vt) {
const auto acctype = IR::AccType::VEC; const auto acctype = IR::AccessType::VEC;
const size_t datasize = 8 << scale; const size_t datasize = 8 << scale;
IR::U64 address; IR::U64 address;

View file

@ -46,13 +46,13 @@ bool TranslatorVisitor::STP_LDP_gen(Imm<2> opc, bool not_postindex, bool wback,
case IR::MemOp::STORE: { case IR::MemOp::STORE: {
const IR::U32U64 data1 = X(datasize, Rt); const IR::U32U64 data1 = X(datasize, Rt);
const IR::U32U64 data2 = X(datasize, Rt2); const IR::U32U64 data2 = X(datasize, Rt2);
Mem(address, dbytes, IR::AccType::NORMAL, data1); Mem(address, dbytes, IR::AccessType::NORMAL, data1);
Mem(ir.Add(address, ir.Imm64(dbytes)), dbytes, IR::AccType::NORMAL, data2); Mem(ir.Add(address, ir.Imm64(dbytes)), dbytes, IR::AccessType::NORMAL, data2);
break; break;
} }
case IR::MemOp::LOAD: { case IR::MemOp::LOAD: {
const IR::U32U64 data1 = Mem(address, dbytes, IR::AccType::NORMAL); const IR::U32U64 data1 = Mem(address, dbytes, IR::AccessType::NORMAL);
const IR::U32U64 data2 = Mem(ir.Add(address, ir.Imm64(dbytes)), dbytes, IR::AccType::NORMAL); const IR::U32U64 data2 = Mem(ir.Add(address, ir.Imm64(dbytes)), dbytes, IR::AccessType::NORMAL);
if (signed_) { if (signed_) {
X(64, Rt, SignExtend(data1, 64)); X(64, Rt, SignExtend(data1, 64));
X(64, Rt2, SignExtend(data2, 64)); X(64, Rt2, SignExtend(data2, 64));
@ -117,13 +117,13 @@ bool TranslatorVisitor::STP_LDP_fpsimd(Imm<2> opc, bool not_postindex, bool wbac
data1 = ir.VectorGetElement(datasize, data1, 0); data1 = ir.VectorGetElement(datasize, data1, 0);
data2 = ir.VectorGetElement(datasize, data2, 0); data2 = ir.VectorGetElement(datasize, data2, 0);
} }
Mem(address, dbytes, IR::AccType::VEC, data1); Mem(address, dbytes, IR::AccessType::VEC, data1);
Mem(ir.Add(address, ir.Imm64(dbytes)), dbytes, IR::AccType::VEC, data2); Mem(ir.Add(address, ir.Imm64(dbytes)), dbytes, IR::AccessType::VEC, data2);
break; break;
} }
case IR::MemOp::LOAD: { case IR::MemOp::LOAD: {
IR::UAnyU128 data1 = Mem(address, dbytes, IR::AccType::VEC); IR::UAnyU128 data1 = Mem(address, dbytes, IR::AccessType::VEC);
IR::UAnyU128 data2 = Mem(ir.Add(address, ir.Imm64(dbytes)), dbytes, IR::AccType::VEC); IR::UAnyU128 data2 = Mem(ir.Add(address, ir.Imm64(dbytes)), dbytes, IR::AccessType::VEC);
if (datasize != 128) { if (datasize != 128) {
data1 = ir.ZeroExtendToQuad(data1); data1 = ir.ZeroExtendToQuad(data1);
data2 = ir.ZeroExtendToQuad(data2); data2 = ir.ZeroExtendToQuad(data2);

View file

@ -10,7 +10,7 @@ namespace Dynarmic::A64 {
static bool RegSharedDecodeAndOperation(TranslatorVisitor& v, size_t scale, u8 shift, Imm<2> size, Imm<1> opc_1, Imm<1> opc_0, Reg Rm, Imm<3> option, Reg Rn, Reg Rt) { static bool RegSharedDecodeAndOperation(TranslatorVisitor& v, size_t scale, u8 shift, Imm<2> size, Imm<1> opc_1, Imm<1> opc_0, Reg Rm, Imm<3> option, Reg Rn, Reg Rt) {
// Shared Decode // Shared Decode
const auto acctype = IR::AccType::NORMAL; const auto acctype = IR::AccessType::NORMAL;
IR::MemOp memop; IR::MemOp memop;
size_t regsize = 64; size_t regsize = 64;
bool signed_ = false; bool signed_ = false;
@ -96,7 +96,7 @@ bool TranslatorVisitor::LDRx_reg(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> optio
static bool VecSharedDecodeAndOperation(TranslatorVisitor& v, size_t scale, u8 shift, Imm<1> opc_0, Reg Rm, Imm<3> option, Reg Rn, Vec Vt) { static bool VecSharedDecodeAndOperation(TranslatorVisitor& v, size_t scale, u8 shift, Imm<1> opc_0, Reg Rm, Imm<3> option, Reg Rn, Vec Vt) {
// Shared Decode // Shared Decode
const auto acctype = IR::AccType::VEC; const auto acctype = IR::AccessType::VEC;
const auto memop = opc_0 == 1 ? IR::MemOp::LOAD : IR::MemOp::STORE; const auto memop = opc_0 == 1 ? IR::MemOp::LOAD : IR::MemOp::STORE;
const size_t datasize = 8 << scale; const size_t datasize = 8 << scale;

View file

@ -9,7 +9,7 @@ namespace Dynarmic::A64 {
static bool StoreRegister(TranslatorVisitor& v, const size_t datasize, const Imm<9> imm9, const Reg Rn, const Reg Rt) { static bool StoreRegister(TranslatorVisitor& v, const size_t datasize, const Imm<9> imm9, const Reg Rn, const Reg Rt) {
const u64 offset = imm9.SignExtend<u64>(); const u64 offset = imm9.SignExtend<u64>();
const auto acctype = IR::AccType::UNPRIV; const auto acctype = IR::AccessType::UNPRIV;
IR::U64 address; IR::U64 address;
if (Rn == Reg::SP) { if (Rn == Reg::SP) {
@ -27,7 +27,7 @@ static bool StoreRegister(TranslatorVisitor& v, const size_t datasize, const Imm
static bool LoadRegister(TranslatorVisitor& v, const size_t datasize, const Imm<9> imm9, const Reg Rn, const Reg Rt) { static bool LoadRegister(TranslatorVisitor& v, const size_t datasize, const Imm<9> imm9, const Reg Rn, const Reg Rt) {
const u64 offset = imm9.SignExtend<u64>(); const u64 offset = imm9.SignExtend<u64>();
const auto acctype = IR::AccType::UNPRIV; const auto acctype = IR::AccessType::UNPRIV;
IR::U64 address; IR::U64 address;
if (Rn == Reg::SP) { if (Rn == Reg::SP) {
@ -47,7 +47,7 @@ static bool LoadRegister(TranslatorVisitor& v, const size_t datasize, const Imm<
static bool LoadRegisterSigned(TranslatorVisitor& v, const size_t datasize, const Imm<2> opc, const Imm<9> imm9, const Reg Rn, const Reg Rt) { static bool LoadRegisterSigned(TranslatorVisitor& v, const size_t datasize, const Imm<2> opc, const Imm<9> imm9, const Reg Rn, const Reg Rt) {
const u64 offset = imm9.SignExtend<u64>(); const u64 offset = imm9.SignExtend<u64>();
const auto acctype = IR::AccType::UNPRIV; const auto acctype = IR::AccessType::UNPRIV;
IR::MemOp memop; IR::MemOp memop;
bool is_signed; bool is_signed;
@ -131,7 +131,7 @@ bool TranslatorVisitor::LDTRSH(Imm<2> opc, Imm<9> imm9, Reg Rn, Reg Rt) {
bool TranslatorVisitor::LDTRSW(Imm<9> imm9, Reg Rn, Reg Rt) { bool TranslatorVisitor::LDTRSW(Imm<9> imm9, Reg Rn, Reg Rt) {
const u64 offset = imm9.SignExtend<u64>(); const u64 offset = imm9.SignExtend<u64>();
const auto acctype = IR::AccType::UNPRIV; const auto acctype = IR::AccessType::UNPRIV;
IR::U64 address; IR::U64 address;
if (Rn == Reg::SP) { if (Rn == Reg::SP) {

View file

@ -62,7 +62,7 @@ static bool SharedDecodeAndOperation(TranslatorVisitor& v, bool wback, IR::MemOp
if (replicate) { if (replicate) {
for (size_t s = 0; s < selem; s++) { for (size_t s = 0; s < selem; s++) {
const Vec tt = static_cast<Vec>((VecNumber(Vt) + s) % 32); const Vec tt = static_cast<Vec>((VecNumber(Vt) + s) % 32);
const IR::UAnyU128 element = v.Mem(v.ir.Add(address, offs), ebytes, IR::AccType::VEC); const IR::UAnyU128 element = v.Mem(v.ir.Add(address, offs), ebytes, IR::AccessType::VEC);
const IR::U128 broadcasted_element = v.ir.VectorBroadcast(esize, element); const IR::U128 broadcasted_element = v.ir.VectorBroadcast(esize, element);
v.V(datasize, tt, broadcasted_element); v.V(datasize, tt, broadcasted_element);
@ -75,12 +75,12 @@ static bool SharedDecodeAndOperation(TranslatorVisitor& v, bool wback, IR::MemOp
const IR::U128 rval = v.V(128, tt); const IR::U128 rval = v.V(128, tt);
if (memop == IR::MemOp::LOAD) { if (memop == IR::MemOp::LOAD) {
const IR::UAny elem = v.Mem(v.ir.Add(address, offs), ebytes, IR::AccType::VEC); const IR::UAny elem = v.Mem(v.ir.Add(address, offs), ebytes, IR::AccessType::VEC);
const IR::U128 vec = v.ir.VectorSetElement(esize, rval, index, elem); const IR::U128 vec = v.ir.VectorSetElement(esize, rval, index, elem);
v.V(128, tt, vec); v.V(128, tt, vec);
} else { } else {
const IR::UAny elem = v.ir.VectorGetElement(esize, rval, index); const IR::UAny elem = v.ir.VectorGetElement(esize, rval, index);
v.Mem(v.ir.Add(address, offs), ebytes, IR::AccType::VEC, elem); v.Mem(v.ir.Add(address, offs), ebytes, IR::AccessType::VEC, elem);
} }
offs = v.ir.Add(offs, v.ir.Imm64(ebytes)); offs = v.ir.Add(offs, v.ir.Imm64(ebytes));
} }

View file

@ -0,0 +1,28 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
namespace Dynarmic::IR {
enum class AccessType {
NORMAL,
VEC,
STREAM,
VECSTREAM,
ATOMIC,
ORDERED,
ORDEREDRW,
LIMITEDORDERED,
UNPRIV,
IFETCH,
PTW,
DC,
IC,
DCZVA,
AT,
};
} // namespace Dynarmic::IR

View file

@ -6,6 +6,7 @@
#pragma once #pragma once
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/ir/access_type.h"
#include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/location_descriptor.h" #include "dynarmic/ir/location_descriptor.h"
#include "dynarmic/ir/terminal.h" #include "dynarmic/ir/terminal.h"
@ -56,24 +57,6 @@ struct UpperAndLower {
U128 lower; U128 lower;
}; };
enum class AccType {
NORMAL,
VEC,
STREAM,
VECSTREAM,
ATOMIC,
ORDERED,
ORDEREDRW,
LIMITEDORDERED,
UNPRIV,
IFETCH,
PTW,
DC,
IC,
DCZVA,
AT,
};
enum class MemOp { enum class MemOp {
LOAD, LOAD,
STORE, STORE,

View file

@ -43,6 +43,7 @@ constexpr Type CoprocInfo = Type::CoprocInfo;
constexpr Type NZCV = Type::NZCVFlags; constexpr Type NZCV = Type::NZCVFlags;
constexpr Type Cond = Type::Cond; constexpr Type Cond = Type::Cond;
constexpr Type Table = Type::Table; constexpr Type Table = Type::Table;
constexpr Type AccessType = Type::AccessType;
static const std::array opcode_info{ static const std::array opcode_info{
#define OPCODE(name, type, ...) Meta{#name, type, {__VA_ARGS__}}, #define OPCODE(name, type, ...) Meta{#name, type, {__VA_ARGS__}},

View file

@ -702,26 +702,26 @@ A32OPC(ExclusiveWriteMemory64, U32, U32,
// A64 Memory access // A64 Memory access
A64OPC(ClearExclusive, Void, ) A64OPC(ClearExclusive, Void, )
A64OPC(ReadMemory8, U8, U64 ) A64OPC(ReadMemory8, U8, U64, AccessType )
A64OPC(ReadMemory16, U16, U64 ) A64OPC(ReadMemory16, U16, U64, AccessType )
A64OPC(ReadMemory32, U32, U64 ) A64OPC(ReadMemory32, U32, U64, AccessType )
A64OPC(ReadMemory64, U64, U64 ) A64OPC(ReadMemory64, U64, U64, AccessType )
A64OPC(ReadMemory128, U128, U64 ) A64OPC(ReadMemory128, U128, U64, AccessType )
A64OPC(ExclusiveReadMemory8, U8, U64 ) A64OPC(ExclusiveReadMemory8, U8, U64, AccessType )
A64OPC(ExclusiveReadMemory16, U16, U64 ) A64OPC(ExclusiveReadMemory16, U16, U64, AccessType )
A64OPC(ExclusiveReadMemory32, U32, U64 ) A64OPC(ExclusiveReadMemory32, U32, U64, AccessType )
A64OPC(ExclusiveReadMemory64, U64, U64 ) A64OPC(ExclusiveReadMemory64, U64, U64, AccessType )
A64OPC(ExclusiveReadMemory128, U128, U64 ) A64OPC(ExclusiveReadMemory128, U128, U64, AccessType )
A64OPC(WriteMemory8, Void, U64, U8 ) A64OPC(WriteMemory8, Void, U64, U8, AccessType )
A64OPC(WriteMemory16, Void, U64, U16 ) A64OPC(WriteMemory16, Void, U64, U16, AccessType )
A64OPC(WriteMemory32, Void, U64, U32 ) A64OPC(WriteMemory32, Void, U64, U32, AccessType )
A64OPC(WriteMemory64, Void, U64, U64 ) A64OPC(WriteMemory64, Void, U64, U64, AccessType )
A64OPC(WriteMemory128, Void, U64, U128 ) A64OPC(WriteMemory128, Void, U64, U128, AccessType )
A64OPC(ExclusiveWriteMemory8, U32, U64, U8 ) A64OPC(ExclusiveWriteMemory8, U32, U64, U8, AccessType )
A64OPC(ExclusiveWriteMemory16, U32, U64, U16 ) A64OPC(ExclusiveWriteMemory16, U32, U64, U16, AccessType )
A64OPC(ExclusiveWriteMemory32, U32, U64, U32 ) A64OPC(ExclusiveWriteMemory32, U32, U64, U32, AccessType )
A64OPC(ExclusiveWriteMemory64, U32, U64, U64 ) A64OPC(ExclusiveWriteMemory64, U32, U64, U64, AccessType )
A64OPC(ExclusiveWriteMemory128, U32, U64, U128 ) A64OPC(ExclusiveWriteMemory128, U32, U64, U128, AccessType )
// Coprocessor // Coprocessor
A32OPC(CoprocInternalOperation, Void, CoprocInfo ) A32OPC(CoprocInternalOperation, Void, CoprocInfo )

View file

@ -32,19 +32,19 @@ void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0));
while (bytes >= 16) { while (bytes >= 16) {
ir.WriteMemory128(addr, zero_u128); ir.WriteMemory128(addr, zero_u128, IR::AccessType::DCZVA);
addr = ir.Add(addr, ir.Imm64(16)); addr = ir.Add(addr, ir.Imm64(16));
bytes -= 16; bytes -= 16;
} }
while (bytes >= 8) { while (bytes >= 8) {
ir.WriteMemory64(addr, ir.Imm64(0)); ir.WriteMemory64(addr, ir.Imm64(0), IR::AccessType::DCZVA);
addr = ir.Add(addr, ir.Imm64(8)); addr = ir.Add(addr, ir.Imm64(8));
bytes -= 8; bytes -= 8;
} }
while (bytes >= 4) { while (bytes >= 4) {
ir.WriteMemory32(addr, ir.Imm32(0)); ir.WriteMemory32(addr, ir.Imm32(0), IR::AccessType::DCZVA);
addr = ir.Add(addr, ir.Imm64(4)); addr = ir.Add(addr, ir.Imm64(4));
bytes -= 4; bytes -= 4;
} }

View file

@ -32,6 +32,7 @@ enum class Type {
NZCVFlags = 1 << 12, NZCVFlags = 1 << 12,
Cond = 1 << 13, Cond = 1 << 13,
Table = 1 << 14, Table = 1 << 14,
AccessType = 1 << 15,
}; };
constexpr Type operator|(Type a, Type b) { constexpr Type operator|(Type a, Type b) {

View file

@ -73,6 +73,11 @@ Value::Value(Cond value)
inner.imm_cond = value; inner.imm_cond = value;
} }
Value::Value(AccessType value)
: type(Type::AccessType) {
inner.imm_acctype = value;
}
bool Value::IsIdentity() const { bool Value::IsIdentity() const {
if (type == Type::Opaque) if (type == Type::Opaque)
return inner.inst->GetOpcode() == Opcode::Identity; return inner.inst->GetOpcode() == Opcode::Identity;
@ -178,6 +183,13 @@ Cond Value::GetCond() const {
return inner.imm_cond; return inner.imm_cond;
} }
AccessType Value::GetAccType() const {
if (IsIdentity())
return inner.inst->GetArg(0).GetAccType();
ASSERT(type == Type::AccessType);
return inner.imm_acctype;
}
s64 Value::GetImmediateAsS64() const { s64 Value::GetImmediateAsS64() const {
ASSERT(IsImmediate()); ASSERT(IsImmediate());

View file

@ -25,6 +25,7 @@ enum class Vec;
namespace Dynarmic::IR { namespace Dynarmic::IR {
class Inst; class Inst;
enum class AccessType;
enum class Cond; enum class Cond;
/** /**
@ -49,6 +50,7 @@ public:
explicit Value(u64 value); explicit Value(u64 value);
explicit Value(CoprocessorInfo value); explicit Value(CoprocessorInfo value);
explicit Value(Cond value); explicit Value(Cond value);
explicit Value(AccessType value);
bool IsIdentity() const; bool IsIdentity() const;
bool IsEmpty() const; bool IsEmpty() const;
@ -68,6 +70,7 @@ public:
u64 GetU64() const; u64 GetU64() const;
CoprocessorInfo GetCoprocInfo() const; CoprocessorInfo GetCoprocInfo() const;
Cond GetCond() const; Cond GetCond() const;
AccessType GetAccType() const;
/** /**
* Retrieves the immediate of a Value instance as a signed 64-bit value. * Retrieves the immediate of a Value instance as a signed 64-bit value.
@ -140,6 +143,7 @@ private:
u64 imm_u64; u64 imm_u64;
CoprocessorInfo imm_coproc; CoprocessorInfo imm_coproc;
Cond imm_cond; Cond imm_cond;
AccessType imm_acctype;
} inner; } inner;
}; };
static_assert(sizeof(Value) <= 2 * sizeof(u64), "IR::Value should be kept small in size"); static_assert(sizeof(Value) <= 2 * sizeof(u64), "IR::Value should be kept small in size");

View file

@ -83,8 +83,8 @@ public:
using iterator_category = std::bidirectional_iterator_tag; using iterator_category = std::bidirectional_iterator_tag;
using value_type = typename IntrusiveRedBlackTreeImpl::value_type; using value_type = typename IntrusiveRedBlackTreeImpl::value_type;
using difference_type = typename IntrusiveRedBlackTreeImpl::difference_type; using difference_type = typename IntrusiveRedBlackTreeImpl::difference_type;
using pointer = typename std::conditional<Const, IntrusiveRedBlackTreeImpl::const_pointer, using pointer = std::conditional_t<Const, IntrusiveRedBlackTreeImpl::const_pointer,
IntrusiveRedBlackTreeImpl::pointer>::type; IntrusiveRedBlackTreeImpl::pointer>;
using reference = using reference =
typename std::conditional<Const, IntrusiveRedBlackTreeImpl::const_reference, typename std::conditional<Const, IntrusiveRedBlackTreeImpl::const_reference,
IntrusiveRedBlackTreeImpl::reference>::type; IntrusiveRedBlackTreeImpl::reference>::type;
@ -263,7 +263,7 @@ namespace impl {
template <typename T, typename Default> template <typename T, typename Default>
using RedBlackKeyType = using RedBlackKeyType =
typename std::remove_pointer<decltype(impl::GetRedBlackKeyType<T, Default>())>::type; typename std::remove_pointer_t<decltype(impl::GetRedBlackKeyType<T, Default>())>;
template <class T, class Traits, class Comparator> template <class T, class Traits, class Comparator>
class IntrusiveRedBlackTree { class IntrusiveRedBlackTree {
@ -299,7 +299,7 @@ public:
friend class IntrusiveRedBlackTree<T, Traits, Comparator>; friend class IntrusiveRedBlackTree<T, Traits, Comparator>;
using ImplIterator = using ImplIterator =
typename std::conditional<Const, ImplType::const_iterator, ImplType::iterator>::type; std::conditional_t<Const, ImplType::const_iterator, ImplType::iterator>;
using iterator_category = std::bidirectional_iterator_tag; using iterator_category = std::bidirectional_iterator_tag;
using value_type = typename IntrusiveRedBlackTree::value_type; using value_type = typename IntrusiveRedBlackTree::value_type;
@ -315,7 +315,7 @@ public:
private: private:
constexpr explicit Iterator(ImplIterator it) : m_impl(it) {} constexpr explicit Iterator(ImplIterator it) : m_impl(it) {}
constexpr explicit Iterator(typename ImplIterator::pointer p) : m_impl(p) {} constexpr explicit Iterator(ImplIterator::pointer p) : m_impl(p) {}
constexpr ImplIterator GetImplIterator() const { constexpr ImplIterator GetImplIterator() const {
return m_impl; return m_impl;

View file

@ -55,12 +55,6 @@ enum class RBColor {
#pragma pack(push, 4) #pragma pack(push, 4)
template <typename T> template <typename T>
class RBEntry { class RBEntry {
private:
T* m_rbe_left{};
T* m_rbe_right{};
T* m_rbe_parent{};
RBColor m_rbe_color{RBColor::RB_BLACK};
public: public:
constexpr RBEntry() = default; constexpr RBEntry() = default;
@ -110,6 +104,12 @@ public:
constexpr void SetColor(RBColor c) { constexpr void SetColor(RBColor c) {
m_rbe_color = c; m_rbe_color = c;
} }
private:
T* m_rbe_left{};
T* m_rbe_right{};
T* m_rbe_parent{};
RBColor m_rbe_color{RBColor::RB_BLACK};
}; };
#pragma pack(pop) #pragma pack(pop)

View file

@ -107,6 +107,12 @@ VAddr InitializeSlabHeap(Core::System& system, KMemoryLayout& memory_layout, VAd
return start + size; return start + size;
} }
size_t CalculateSlabHeapGapSize() {
constexpr size_t KernelSlabHeapGapSize = 2_MiB - 296_KiB;
static_assert(KernelSlabHeapGapSize <= KernelSlabHeapGapsSizeMax);
return KernelSlabHeapGapSize;
}
} // namespace } // namespace
KSlabResourceCounts KSlabResourceCounts::CreateDefault() { KSlabResourceCounts KSlabResourceCounts::CreateDefault() {
@ -137,12 +143,6 @@ void InitializeSlabResourceCounts(KernelCore& kernel) {
} }
} }
size_t CalculateSlabHeapGapSize() {
constexpr size_t KernelSlabHeapGapSize = 2_MiB - 296_KiB;
static_assert(KernelSlabHeapGapSize <= KernelSlabHeapGapsSizeMax);
return KernelSlabHeapGapSize;
}
size_t CalculateTotalSlabHeapSize(const KernelCore& kernel) { size_t CalculateTotalSlabHeapSize(const KernelCore& kernel) {
size_t size = 0; size_t size = 0;

View file

@ -16,13 +16,8 @@
namespace Kernel { namespace Kernel {
class KPageBuffer final : public KSlabAllocated<KPageBuffer> { class KPageBuffer final : public KSlabAllocated<KPageBuffer> {
private:
alignas(PageSize) std::array<u8, PageSize> m_buffer;
public: public:
KPageBuffer() { KPageBuffer() = default;
std::memset(&m_buffer, 0, m_buffer.size());
}
PAddr GetPhysicalAddress(Core::System& system) const { PAddr GetPhysicalAddress(Core::System& system) const {
return system.DeviceMemory().GetPhysicalAddr(this); return system.DeviceMemory().GetPhysicalAddr(this);
@ -32,6 +27,9 @@ public:
ASSERT(Common::IsAligned(phys_addr, PageSize)); ASSERT(Common::IsAligned(phys_addr, PageSize));
return reinterpret_cast<KPageBuffer*>(system.DeviceMemory().GetPointer(phys_addr)); return reinterpret_cast<KPageBuffer*>(system.DeviceMemory().GetPointer(phys_addr));
} }
private:
alignas(PageSize) std::array<u8, PageSize> m_buffer{};
}; };
static_assert(sizeof(KPageBuffer) == PageSize); static_assert(sizeof(KPageBuffer) == PageSize);

View file

@ -98,7 +98,7 @@ ResultCode KServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& co
UNREACHABLE(); UNREACHABLE();
return ResultSuccess; // Ignore error if asserts are off return ResultSuccess; // Ignore error if asserts are off
} }
if (auto strong_ptr = manager->DomainHandler(object_id - 1).lock(); strong_ptr) { if (auto strong_ptr = manager->DomainHandler(object_id - 1).lock()) {
return strong_ptr->HandleSyncRequest(*this, context); return strong_ptr->HandleSyncRequest(*this, context);
} else { } else {
UNREACHABLE(); UNREACHABLE();

View file

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <algorithm>
#include <array> #include <array>
#include "common/alignment.h" #include "common/alignment.h"
@ -28,9 +29,7 @@ public:
public: public:
explicit KThreadLocalPage(VAddr addr = {}) : m_virt_addr(addr) { explicit KThreadLocalPage(VAddr addr = {}) : m_virt_addr(addr) {
for (size_t i = 0; i < m_is_region_free.size(); i++) { m_is_region_free.fill(true);
m_is_region_free[i] = true;
}
} }
constexpr VAddr GetAddress() const { constexpr VAddr GetAddress() const {
@ -44,21 +43,13 @@ public:
void Release(VAddr addr); void Release(VAddr addr);
bool IsAllUsed() const { bool IsAllUsed() const {
for (size_t i = 0; i < RegionsPerPage; i++) { return std::ranges::all_of(m_is_region_free.begin(), m_is_region_free.end(),
if (m_is_region_free[i]) { [](bool is_free) { return !is_free; });
return false;
}
}
return true;
} }
bool IsAllFree() const { bool IsAllFree() const {
for (size_t i = 0; i < RegionsPerPage; i++) { return std::ranges::all_of(m_is_region_free.begin(), m_is_region_free.end(),
if (!m_is_region_free[i]) { [](bool is_free) { return is_free; });
return false;
}
}
return true;
} }
bool IsAnyUsed() const { bool IsAnyUsed() const {

View file

@ -284,16 +284,16 @@ struct KernelCore::Impl {
// Gets the dummy KThread for the caller, allocating a new one if this is the first time // Gets the dummy KThread for the caller, allocating a new one if this is the first time
KThread* GetHostDummyThread() { KThread* GetHostDummyThread() {
auto init_thread_ = [this](KThread* thread) { auto initialize = [this](KThread* thread) {
ASSERT(KThread::InitializeDummyThread(thread).IsSuccess()); ASSERT(KThread::InitializeDummyThread(thread).IsSuccess());
thread->SetName(fmt::format("DummyThread:{}", GetHostThreadId())); thread->SetName(fmt::format("DummyThread:{}", GetHostThreadId()));
return thread; return thread;
}; };
thread_local auto thread = KThread(system.Kernel()); thread_local auto raw_thread = KThread(system.Kernel());
thread_local auto init_thread = init_thread_(&thread); thread_local auto thread = initialize(&raw_thread);
return &thread; return thread;
} }
/// Registers a CPU core thread by allocating a host thread ID for it /// Registers a CPU core thread by allocating a host thread ID for it

View file

@ -1189,6 +1189,8 @@ void Config::SaveCpuValues() {
WriteBasicSetting(Settings::values.cpuopt_misc_ir); WriteBasicSetting(Settings::values.cpuopt_misc_ir);
WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks); WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
WriteBasicSetting(Settings::values.cpuopt_fastmem); WriteBasicSetting(Settings::values.cpuopt_fastmem);
WriteBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
WriteBasicSetting(Settings::values.cpuopt_recompile_exclusives);
} }
qt_config->endGroup(); qt_config->endGroup();