early-access version 1546
This commit is contained in:
parent
c7fcf99851
commit
0a011dc368
7 changed files with 209 additions and 17 deletions
|
@ -1,7 +1,7 @@
|
|||
yuzu emulator early access
|
||||
=============
|
||||
|
||||
This is the source code for early-access 1543.
|
||||
This is the source code for early-access 1546.
|
||||
|
||||
## Legal Notice
|
||||
|
||||
|
|
1
externals/dynarmic/src/CMakeLists.txt
vendored
1
externals/dynarmic/src/CMakeLists.txt
vendored
|
@ -163,6 +163,7 @@ if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
|||
frontend/A32/translate/impl/thumb32_data_processing_shifted_register.cpp
|
||||
frontend/A32/translate/impl/thumb32_load_byte.cpp
|
||||
frontend/A32/translate/impl/thumb32_load_halfword.cpp
|
||||
frontend/A32/translate/impl/thumb32_load_store_dual.cpp
|
||||
frontend/A32/translate/impl/thumb32_load_store_multiple.cpp
|
||||
frontend/A32/translate/impl/thumb32_load_word.cpp
|
||||
frontend/A32/translate/impl/thumb32_long_multiply.cpp
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/perf_map.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
|
@ -43,8 +44,8 @@ const std::array<Xbyak::Reg64, 6> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PA
|
|||
|
||||
namespace {
|
||||
|
||||
constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024;
|
||||
constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
|
||||
constexpr size_t TOTAL_CODE_SIZE = 256 * 1024 * 1024;
|
||||
constexpr size_t FAR_CODE_OFFSET = 200 * 1024 * 1024;
|
||||
constexpr size_t CONSTANT_POOL_SIZE = 2 * 1024 * 1024;
|
||||
|
||||
class CustomXbyakAllocator : public Xbyak::Allocator {
|
||||
|
@ -364,7 +365,21 @@ bool BlockOfCode::HasBMI2() const {
|
|||
}
|
||||
|
||||
bool BlockOfCode::HasFastBMI2() const {
|
||||
return DoesCpuSupport(Xbyak::util::Cpu::tBMI2) && !DoesCpuSupport(Xbyak::util::Cpu::tAMD);
|
||||
if (DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
|
||||
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
|
||||
// Check for Zen 3 or newer by its family (0x19).
|
||||
// See also: https://en.wikichip.org/wiki/amd/cpuid
|
||||
if (DoesCpuSupport(Xbyak::util::Cpu::tAMD)) {
|
||||
std::array<u32, 4> data{};
|
||||
cpu_info.getCpuid(1, data.data());
|
||||
const u32 family_base = Common::Bits< 8, 11>(data[0]);
|
||||
const u32 family_extended = Common::Bits<20, 27>(data[0]);
|
||||
const u32 family = family_base + family_extended;
|
||||
return family >= 0x19;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BlockOfCode::HasFMA() const {
|
||||
|
|
|
@ -337,8 +337,12 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
enum CheckInputNaN {
|
||||
Yes, No,
|
||||
};
|
||||
|
||||
template<size_t fsize, template<typename> class Indexer, typename Function>
|
||||
void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, typename NaNHandler<fsize, Indexer, 3>::function_type nan_handler = NaNHandler<fsize, Indexer, 3>::GetDefault()) {
|
||||
void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, CheckInputNaN check_input_nan = CheckInputNaN::No, typename NaNHandler<fsize, Indexer, 3>::function_type nan_handler = NaNHandler<fsize, Indexer, 3>::GetDefault()) {
|
||||
static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64");
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
@ -371,15 +375,31 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movaps(nan_mask, xmm_b);
|
||||
code.movaps(result, xmm_a);
|
||||
FCODE(cmpunordp)(nan_mask, xmm_a);
|
||||
|
||||
if (check_input_nan == CheckInputNaN::Yes) {
|
||||
if (code.HasAVX()) {
|
||||
FCODE(vcmpunordp)(nan_mask, xmm_a, xmm_b);
|
||||
} else {
|
||||
code.movaps(nan_mask, xmm_b);
|
||||
FCODE(cmpunordp)(nan_mask, xmm_a);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||
(code.*fn)(result, xmm_b);
|
||||
} else {
|
||||
fn(result, xmm_b);
|
||||
}
|
||||
FCODE(cmpunordp)(nan_mask, result);
|
||||
|
||||
if (check_input_nan == CheckInputNaN::Yes) {
|
||||
FCODE(cmpunordp)(nan_mask, result);
|
||||
} else if (code.HasAVX()) {
|
||||
FCODE(vcmpunordp)(nan_mask, result, result);
|
||||
} else {
|
||||
code.movaps(nan_mask, result);
|
||||
FCODE(cmpunordp)(nan_mask, nan_mask);
|
||||
}
|
||||
|
||||
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
|
||||
|
||||
|
@ -951,7 +971,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
code.andnps(mask, eq);
|
||||
code.orps(result, mask);
|
||||
}
|
||||
});
|
||||
}, CheckInputNaN::Yes);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
|
|
@ -13,17 +13,17 @@ INST(thumb32_LDMDB, "LDMDB/LDMEA", "1110100100W1nnnniiiiii
|
|||
// Load/Store Dual, Load/Store Exclusive, Table Branch
|
||||
//INST(thumb32_STREX, "STREX", "111010000100--------------------")
|
||||
//INST(thumb32_LDREX, "LDREX", "111010000101--------------------")
|
||||
//INST(thumb32_STRD_imm_1, "STRD (imm)", "11101000-110--------------------")
|
||||
//INST(thumb32_STRD_imm_2, "STRD (imm)", "11101001-1-0--------------------")
|
||||
//INST(thumb32_LDRD_imm_1, "LDRD (lit)", "11101000-1111111----------------")
|
||||
//INST(thumb32_LDRD_imm_2, "LDRD (lit)", "11101001-1-11111----------------")
|
||||
//INST(thumb32_LDRD_imm_1, "LDRD (imm)", "11101000-111--------------------")
|
||||
//INST(thumb32_LDRD_imm_2, "LDRD (imm)", "11101001-1-1--------------------")
|
||||
INST(thumb32_STRD_imm_1, "STRD (imm)", "11101000U110nnnnttttssssiiiiiiii")
|
||||
INST(thumb32_STRD_imm_2, "STRD (imm)", "11101001U1W0nnnnttttssssiiiiiiii")
|
||||
INST(thumb32_LDRD_lit_1, "LDRD (lit)", "11101000U1111111ttttssssiiiiiiii")
|
||||
INST(thumb32_LDRD_lit_2, "LDRD (lit)", "11101001U1W11111ttttssssiiiiiiii")
|
||||
INST(thumb32_LDRD_imm_1, "LDRD (imm)", "11101000U111nnnnttttssssiiiiiiii")
|
||||
INST(thumb32_LDRD_imm_2, "LDRD (imm)", "11101001U1W1nnnnttttssssiiiiiiii")
|
||||
//INST(thumb32_STREXB, "STREXB", "111010001100------------0100----")
|
||||
//INST(thumb32_STREXH, "STREXH", "111010001100------------0101----")
|
||||
//INST(thumb32_STREXD, "STREXD", "111010001100------------0111----")
|
||||
//INST(thumb32_TBB, "TBB", "111010001101------------0000----")
|
||||
//INST(thumb32_TBH, "TBH", "111010001101------------0001----")
|
||||
INST(thumb32_TBB, "TBB", "111010001101nnnn111100000000mmmm")
|
||||
INST(thumb32_TBH, "TBH", "111010001101nnnn111100000001mmmm")
|
||||
//INST(thumb32_LDREXB, "LDREXB", "111010001101------------0100----")
|
||||
//INST(thumb32_LDREXH, "LDREXH", "111010001101------------0101----")
|
||||
//INST(thumb32_LDREXD, "LDREXD", "111010001101------------0111----")
|
||||
|
|
146
externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_store_dual.cpp
vendored
Executable file
146
externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_store_dual.cpp
vendored
Executable file
|
@ -0,0 +1,146 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2021 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "common/bit_util.h"
|
||||
#include "frontend/A32/translate/impl/translate_thumb.h"
|
||||
|
||||
namespace Dynarmic::A32 {
|
||||
static bool ITBlockCheck(const A32::IREmitter& ir) {
|
||||
return ir.current_location.IT().IsInITBlock() && !ir.current_location.IT().IsLastInITBlock();
|
||||
}
|
||||
|
||||
static bool TableBranch(ThumbTranslatorVisitor& v, Reg n, Reg m, bool half) {
|
||||
if (m == Reg::PC) {
|
||||
return v.UnpredictableInstruction();
|
||||
}
|
||||
if (ITBlockCheck(v.ir)) {
|
||||
return v.UnpredictableInstruction();
|
||||
}
|
||||
|
||||
const auto reg_m = v.ir.GetRegister(m);
|
||||
const auto reg_n = v.ir.GetRegister(n);
|
||||
|
||||
IR::U32 halfwords;
|
||||
if (half) {
|
||||
const auto data = v.ir.ReadMemory16(v.ir.Add(reg_n, v.ir.LogicalShiftLeft(reg_m, v.ir.Imm8(1))));
|
||||
halfwords = v.ir.ZeroExtendToWord(data);
|
||||
} else {
|
||||
halfwords = v.ir.ZeroExtendToWord(v.ir.ReadMemory8(v.ir.Add(reg_n, reg_m)));
|
||||
}
|
||||
|
||||
const auto current_pc = v.ir.Imm32(v.ir.PC());
|
||||
const auto branch_value = v.ir.Add(current_pc, v.ir.Add(halfwords, halfwords));
|
||||
|
||||
v.ir.UpdateUpperLocationDescriptor();
|
||||
v.ir.BranchWritePC(branch_value);
|
||||
v.ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool LoadDualImmediate(ThumbTranslatorVisitor& v, bool P, bool U, bool W,
|
||||
Reg n, Reg t, Reg t2, Imm<8> imm8) {
|
||||
if (W && (n == t || n == t2)) {
|
||||
return v.UnpredictableInstruction();
|
||||
}
|
||||
if (t == Reg::PC || t2 == Reg::PC || t == t2) {
|
||||
return v.UnpredictableInstruction();
|
||||
}
|
||||
|
||||
const u32 imm = imm8.ZeroExtend() << 2;
|
||||
const IR::U32 reg_n = v.ir.GetRegister(n);
|
||||
const IR::U32 offset_address = U ? v.ir.Add(reg_n, v.ir.Imm32(imm))
|
||||
: v.ir.Sub(reg_n, v.ir.Imm32(imm));
|
||||
const IR::U32 address_1 = P ? offset_address
|
||||
: reg_n;
|
||||
const IR::U32 address_2 = v.ir.Add(address_1, v.ir.Imm32(4));
|
||||
|
||||
v.ir.SetRegister(t, v.ir.ReadMemory32(address_1));
|
||||
v.ir.SetRegister(t2, v.ir.ReadMemory32(address_2));
|
||||
|
||||
if (W) {
|
||||
v.ir.SetRegister(n, offset_address);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool LoadDualLiteral(ThumbTranslatorVisitor& v, bool U, bool W, Reg t, Reg t2, Imm<8> imm8) {
|
||||
if (t == Reg::PC || t2 == Reg::PC || t == t2) {
|
||||
return v.UnpredictableInstruction();
|
||||
}
|
||||
if (W) {
|
||||
return v.UnpredictableInstruction();
|
||||
}
|
||||
|
||||
const auto imm = imm8.ZeroExtend() << 2;
|
||||
const auto address_1 = U ? v.ir.Add(v.ir.Imm32(v.ir.AlignPC(4)), v.ir.Imm32(imm))
|
||||
: v.ir.Sub(v.ir.Imm32(v.ir.AlignPC(4)), v.ir.Imm32(imm));
|
||||
const auto address_2 = v.ir.Add(address_1, v.ir.Imm32(4));
|
||||
|
||||
v.ir.SetRegister(t, v.ir.ReadMemory32(address_1));
|
||||
v.ir.SetRegister(t2, v.ir.ReadMemory32(address_2));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool StoreDual(ThumbTranslatorVisitor& v, bool P, bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8) {
|
||||
if (W && (n == t || n == t2)) {
|
||||
return v.UnpredictableInstruction();
|
||||
}
|
||||
if (n == Reg::PC || t == Reg::PC || t2 == Reg::PC) {
|
||||
return v.UnpredictableInstruction();
|
||||
}
|
||||
|
||||
const u32 imm = imm8.ZeroExtend() << 2;
|
||||
const IR::U32 reg_n = v.ir.GetRegister(n);
|
||||
const IR::U32 reg_t = v.ir.GetRegister(t);
|
||||
const IR::U32 reg_t2 = v.ir.GetRegister(t2);
|
||||
|
||||
const IR::U32 offset_address = U ? v.ir.Add(reg_n, v.ir.Imm32(imm))
|
||||
: v.ir.Sub(reg_n, v.ir.Imm32(imm));
|
||||
const IR::U32 address_1 = P ? offset_address
|
||||
: reg_n;
|
||||
const IR::U32 address_2 = v.ir.Add(address_1, v.ir.Imm32(4));
|
||||
|
||||
v.ir.WriteMemory32(address_1, reg_t);
|
||||
v.ir.WriteMemory32(address_2, reg_t2);
|
||||
|
||||
if (W) {
|
||||
v.ir.SetRegister(n, offset_address);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ThumbTranslatorVisitor::thumb32_LDRD_imm_1(bool U, Reg n, Reg t, Reg t2, Imm<8> imm8) {
|
||||
return LoadDualImmediate(*this, false, U, true, n, t, t2, imm8);
|
||||
}
|
||||
|
||||
bool ThumbTranslatorVisitor::thumb32_LDRD_imm_2(bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8) {
|
||||
return LoadDualImmediate(*this, true, U, W, n, t, t2, imm8);
|
||||
}
|
||||
|
||||
bool ThumbTranslatorVisitor::thumb32_LDRD_lit_1(bool U, Reg t, Reg t2, Imm<8> imm8) {
|
||||
return LoadDualLiteral(*this, U, true, t, t2, imm8);
|
||||
}
|
||||
|
||||
bool ThumbTranslatorVisitor::thumb32_LDRD_lit_2(bool U, bool W, Reg t, Reg t2, Imm<8> imm8) {
|
||||
return LoadDualLiteral(*this, U, W, t, t2, imm8);
|
||||
}
|
||||
|
||||
bool ThumbTranslatorVisitor::thumb32_STRD_imm_1(bool U, Reg n, Reg t, Reg t2, Imm<8> imm8) {
|
||||
return StoreDual(*this, false, U, true, n, t, t2, imm8);
|
||||
}
|
||||
|
||||
bool ThumbTranslatorVisitor::thumb32_STRD_imm_2(bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8) {
|
||||
return StoreDual(*this, true, U, W, n, t, t2, imm8);
|
||||
}
|
||||
|
||||
bool ThumbTranslatorVisitor::thumb32_TBB(Reg n, Reg m) {
|
||||
return TableBranch(*this, n, m, false);
|
||||
}
|
||||
|
||||
bool ThumbTranslatorVisitor::thumb32_TBH(Reg n, Reg m) {
|
||||
return TableBranch(*this, n, m, true);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
|
@ -179,6 +179,16 @@ struct ThumbTranslatorVisitor final {
|
|||
bool thumb32_STMIA(bool W, Reg n, Imm<15> reg_list);
|
||||
bool thumb32_STMDB(bool W, Reg n, Imm<15> reg_list);
|
||||
|
||||
// thumb32 load/store dual, load/store exclusive, table branch instructions
|
||||
bool thumb32_LDRD_imm_1(bool U, Reg n, Reg t, Reg t2, Imm<8> imm8);
|
||||
bool thumb32_LDRD_imm_2(bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8);
|
||||
bool thumb32_LDRD_lit_1(bool U, Reg t, Reg t2, Imm<8> imm8);
|
||||
bool thumb32_LDRD_lit_2(bool U, bool W, Reg t, Reg t2, Imm<8> imm8);
|
||||
bool thumb32_STRD_imm_1(bool U, Reg n, Reg t, Reg t2, Imm<8> imm8);
|
||||
bool thumb32_STRD_imm_2(bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8);
|
||||
bool thumb32_TBB(Reg n, Reg m);
|
||||
bool thumb32_TBH(Reg n, Reg m);
|
||||
|
||||
// thumb32 data processing (shifted register) instructions
|
||||
bool thumb32_TST_reg(Reg n, Imm<3> imm3, Imm<2> imm2, ShiftType type, Reg m);
|
||||
bool thumb32_AND_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2> imm2, ShiftType type, Reg m);
|
||||
|
|
Loading…
Reference in a new issue