early-access version 2836

This commit is contained in:
pineappleEA 2022-07-15 13:11:09 +02:00
parent 0e7aef7e36
commit 2a9883730d
78 changed files with 1122 additions and 982 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 2835.
This is the source code for early-access 2836.
## Legal Notice

View file

@ -1,6 +1,8 @@
# Built files
build/
build-*/
cmake-build-*/
.idea/
docs/Doxygen/
# Generated files
src/dynarmic/backend/x64/mig/

View file

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.8)
project(dynarmic LANGUAGES C CXX ASM VERSION 6.1.1)
project(dynarmic LANGUAGES C CXX ASM VERSION 6.2.0)
# Determine if we're built as a subproject (using add_subdirectory)
# or if this is the master project.

View file

@ -267,21 +267,30 @@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
```
### mp
### mcl & oaknut
```
Copyright (C) 2017 MerryMage
MIT License
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted.
Copyright (c) 2022 merryhime <https://mary.rs>
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
```
### robin-map

View file

@ -3,36 +3,38 @@ on: [push, pull_request]
jobs:
test_on_ubuntu:
runs-on: ubuntu-latest
name: Build on ${{ matrix.distro }} ${{ matrix.arch }}
strategy:
matrix:
include:
- arch: aarch64
distro: ubuntu_latest
name: g++-10
steps:
- uses: actions/checkout@v3
- uses: uraimo/run-on-arch-action@v2
name: Build and Test
id: build
with:
arch: ${{ matrix.arch }}
distro: ${{ matrix.distro }}
shell: /bin/bash
- name: Checkout oaknut repo
uses: actions/checkout@v3
install: |
apt-get update -q -y
apt-get install -q -y make cmake g++ git
- name: Install dependencies
run: >
sudo apt-get install -q -y
gcc-10-aarch64-linux-gnu
g++-10-aarch64-linux-gnu
ninja-build
qemu-user
pushd /tmp
git clone https://github.com/catchorg/Catch2.git
cd Catch2
cmake -Bbuild -H. -DBUILD_TESTING=OFF
cmake --build build/ --target install
popd
- name: Checkout Catch2 v3 repo
run: git clone https://github.com/catchorg/Catch2.git externals/catch
run: |
cmake -Bbuild -H.
cmake --build build
./build/oaknut-tests
- name: Configure CMake
env:
CC: aarch64-linux-gnu-gcc-10
CXX: aarch64-linux-gnu-g++-10
run: >
cmake
-B ${{github.workspace}}/build
-H.
-GNinja
-DDYNARMIC_USE_BUNDLED_CATCH=ON
- name: Build
working-directory: ${{github.workspace}}/build
run: ninja
- name: Test
working-directory: ${{github.workspace}}/build
run: qemu-aarch64 -L /usr/aarch64-linux-gnu ./oaknut-tests -d yes

View file

@ -39,7 +39,11 @@ target_compile_features(oaknut INTERFACE cxx_std_20)
# Tests
if (MASTER_PROJECT)
find_package(Catch2 3 REQUIRED)
if (DYNARMIC_USE_BUNDLED_CATCH)
add_subdirectory(externals/catch)
else()
find_package(Catch2 3 REQUIRED)
endif()
add_executable(oaknut-tests
tests/basic.cpp
@ -49,9 +53,4 @@ if (MASTER_PROJECT)
target_include_directories(oaknut-tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/tests)
target_link_libraries(oaknut-tests PRIVATE Catch2::Catch2WithMain merry::oaknut)
target_compile_options(oaknut-tests PRIVATE -Wall -Wextra -Wcast-qual -pedantic -pedantic-errors -Wfatal-errors -Wno-missing-braces)
include(CTest)
include(Catch)
catch_discover_tests(oaknut-tests)
enable_testing()
endif()

View file

@ -1,6 +1,9 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <new>

View file

@ -114,9 +114,6 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
// Start emitting.
code.align();
const u8* const entrypoint = code.getCurr();
code.SwitchToFarCode();
const u8* const entrypoint_far = code.getCurr();
code.SwitchToNearCode();
EmitCondPrelude(ctx);
@ -155,6 +152,11 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3();
for (auto& deferred_emit : ctx.deferred_emits) {
deferred_emit();
}
code.int3();
const size_t size = static_cast<size_t>(code.getCurr() - entrypoint);
const A32::LocationDescriptor descriptor{block.Location()};
@ -163,7 +165,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
const auto range = boost::icl::discrete_interval<u32>::closed(descriptor.PC(), end_location.PC() - 1);
block_ranges.AddRange(range, descriptor);
return RegisterBlock(descriptor, entrypoint, entrypoint_far, size);
return RegisterBlock(descriptor, entrypoint, size);
}
void A32EmitX64::ClearCache() {
@ -1168,16 +1170,9 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
}
}
Xbyak::Label dest;
code.jmp(dest, Xbyak::CodeGenerator::T_NEAR);
code.SwitchToFarCode();
code.align(16);
code.L(dest);
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
PushRSBHelper(rax, rbx, terminal.next);
code.ForceReturnFromRunCode();
code.SwitchToNearCode();
}
void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) {

View file

@ -110,6 +110,7 @@ protected:
FakeCall FastmemCallback(u64 rip);
// Memory access helpers
void EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak::Label* end = nullptr);
template<std::size_t bitsize, auto callback>
void EmitMemoryRead(A32EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize, auto callback>

View file

@ -235,4 +235,25 @@ void A32EmitX64::EmitA32ExclusiveWriteMemory64(A32EmitContext& ctx, IR::Inst* in
}
}
void A32EmitX64::EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak::Label* end) {
if (!conf.check_halt_on_memory_access) {
return;
}
Xbyak::Label skip;
const A32::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
code.test(dword[r15 + offsetof(A32JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
if (end) {
code.jz(*end, code.T_NEAR);
} else {
code.jz(skip, code.T_NEAR);
}
EmitSetUpperLocationDescriptor(current_location, ctx.Location());
code.mov(dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC());
code.ForceReturnFromRunCode();
code.L(skip);
}
} // namespace Dynarmic::Backend::X64

View file

@ -60,7 +60,7 @@ static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code)
struct Jit::Impl {
Impl(Jit* jit, A32::UserConfig conf)
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, GenRCP(conf))
, emitter(block_of_code, conf, jit)
, polyfill_options(GenPolyfillOptions(block_of_code))
, conf(std::move(conf))
@ -171,10 +171,9 @@ private:
PerformCacheInvalidation();
}
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks,
{conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions, conf.check_halt_on_memory_access});
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::PolyfillPass(ir_block, polyfill_options);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
Optimization::A32GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
}

View file

@ -85,9 +85,6 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
// Start emitting.
code.align();
const u8* const entrypoint = code.getCurr();
code.SwitchToFarCode();
const u8* const entrypoint_far = code.getCurr();
code.SwitchToNearCode();
ASSERT(block.GetCondition() == IR::Cond::AL);
@ -126,6 +123,11 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3();
for (auto& deferred_emit : ctx.deferred_emits) {
deferred_emit();
}
code.int3();
const size_t size = static_cast<size_t>(code.getCurr() - entrypoint);
const A64::LocationDescriptor descriptor{block.Location()};
@ -134,7 +136,7 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
const auto range = boost::icl::discrete_interval<u64>::closed(descriptor.PC(), end_location.PC() - 1);
block_ranges.AddRange(range, descriptor);
return RegisterBlock(descriptor, entrypoint, entrypoint_far, size);
return RegisterBlock(descriptor, entrypoint, size);
}
void A64EmitX64::ClearCache() {

View file

@ -108,6 +108,7 @@ protected:
FakeCall FastmemCallback(u64 rip);
// Memory access helpers
void EmitCheckMemoryAbort(A64EmitContext& ctx, IR::Inst* inst, Xbyak::Label* end = nullptr);
template<std::size_t bitsize, auto callback>
void EmitMemoryRead(A64EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize, auto callback>

View file

@ -407,4 +407,25 @@ void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* i
}
}
void A64EmitX64::EmitCheckMemoryAbort(A64EmitContext&, IR::Inst* inst, Xbyak::Label* end) {
if (!conf.check_halt_on_memory_access) {
return;
}
Xbyak::Label skip;
const A64::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
code.test(dword[r15 + offsetof(A64JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
if (end) {
code.jz(*end, code.T_NEAR);
} else {
code.jz(skip, code.T_NEAR);
}
code.mov(rax, current_location.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode();
code.L(skip);
}
} // namespace Dynarmic::Backend::X64

View file

@ -58,7 +58,7 @@ struct Jit::Impl final {
public:
Impl(Jit* jit, UserConfig conf)
: conf(conf)
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, GenRCP(conf))
, emitter(block_of_code, conf, jit)
, polyfill_options(GenPolyfillOptions(block_of_code)) {
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
@ -269,10 +269,10 @@ private:
// JIT Compile
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct, conf.hook_hint_instructions, conf.check_halt_on_memory_access});
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
Optimization::PolyfillPass(ir_block, polyfill_options);
Optimization::A64CallbackConfigPass(ir_block, conf);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
Optimization::A64GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
}

View file

@ -185,22 +185,19 @@ HostFeature GetHostFeatures() {
} // anonymous namespace
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, size_t far_code_offset, std::function<void(BlockOfCode&)> rcp)
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, std::function<void(BlockOfCode&)> rcp)
: Xbyak::CodeGenerator(total_code_size, nullptr, &s_allocator)
, cb(std::move(cb))
, jsi(jsi)
, far_code_offset(far_code_offset)
, constant_pool(*this, CONSTANT_POOL_SIZE)
, host_features(GetHostFeatures()) {
ASSERT(total_code_size > far_code_offset);
EnableWriting();
GenRunCode(rcp);
}
void BlockOfCode::PreludeComplete() {
prelude_complete = true;
near_code_begin = getCurr();
far_code_begin = getCurr() + far_code_offset;
code_begin = getCurr();
ClearCache();
DisableWriting();
}
@ -219,21 +216,15 @@ void BlockOfCode::DisableWriting() {
void BlockOfCode::ClearCache() {
ASSERT(prelude_complete);
in_far_code = false;
near_code_ptr = near_code_begin;
far_code_ptr = far_code_begin;
SetCodePtr(near_code_begin);
SetCodePtr(code_begin);
}
size_t BlockOfCode::SpaceRemaining() const {
ASSERT(prelude_complete);
const u8* current_near_ptr = in_far_code ? reinterpret_cast<const u8*>(near_code_ptr) : getCurr<const u8*>();
const u8* current_far_ptr = in_far_code ? getCurr<const u8*>() : reinterpret_cast<const u8*>(far_code_ptr);
if (current_near_ptr >= far_code_begin)
const u8* current_ptr = getCurr<const u8*>();
if (current_ptr >= &top_[maxSize_])
return 0;
if (current_far_ptr >= &top_[maxSize_])
return 0;
return std::min(reinterpret_cast<const u8*>(far_code_begin) - current_near_ptr, &top_[maxSize_] - current_far_ptr);
return &top_[maxSize_] - current_ptr;
}
HaltReason BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const {
@ -406,26 +397,8 @@ Xbyak::Address BlockOfCode::XmmConst(const Xbyak::AddressFrame& frame, u64 lower
return constant_pool.GetConstant(frame, lower, upper);
}
void BlockOfCode::SwitchToFarCode() {
ASSERT(prelude_complete);
ASSERT(!in_far_code);
in_far_code = true;
near_code_ptr = getCurr();
SetCodePtr(far_code_ptr);
ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!");
}
void BlockOfCode::SwitchToNearCode() {
ASSERT(prelude_complete);
ASSERT(in_far_code);
in_far_code = false;
far_code_ptr = getCurr();
SetCodePtr(near_code_ptr);
}
CodePtr BlockOfCode::GetCodeBegin() const {
return near_code_begin;
return code_begin;
}
size_t BlockOfCode::GetTotalCodeSize() const {

View file

@ -36,7 +36,7 @@ struct RunCodeCallbacks {
class BlockOfCode final : public Xbyak::CodeGenerator {
public:
BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, size_t far_code_offset, std::function<void(BlockOfCode&)> rcp);
BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, std::function<void(BlockOfCode&)> rcp);
BlockOfCode(const BlockOfCode&) = delete;
/// Call when external emitters have finished emitting their preludes.
@ -49,7 +49,7 @@ public:
/// Clears this block of code and resets code pointer to beginning.
void ClearCache();
/// Calculates how much space is remaining to use. This is the minimum of near code and far code.
/// Calculates how much space is remaining to use.
size_t SpaceRemaining() const;
/// Runs emulated code from code_ptr.
@ -125,11 +125,6 @@ public:
mcl::bit::replicate_element<u64>(esize, value));
}
/// Far code sits far away from the near code. Execution remains primarily in near code.
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
void SwitchToFarCode();
void SwitchToNearCode();
CodePtr GetCodeBegin() const;
size_t GetTotalCodeSize() const;
@ -180,18 +175,12 @@ public:
private:
RunCodeCallbacks cb;
JitStateInfo jsi;
size_t far_code_offset;
bool prelude_complete = false;
CodePtr near_code_begin = nullptr;
CodePtr far_code_begin = nullptr;
CodePtr code_begin = nullptr;
ConstantPool constant_pool;
bool in_far_code = false;
CodePtr near_code_ptr;
CodePtr far_code_ptr;
using RunCodeFuncType = HaltReason (*)(void*, CodePtr);
RunCodeFuncType run_code = nullptr;
RunCodeFuncType step_code = nullptr;

View file

@ -32,6 +32,8 @@ using namespace Xbyak::util;
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
: reg_alloc(reg_alloc), block(block) {}
EmitContext::~EmitContext() = default;
size_t EmitContext::GetInstOffset(IR::Inst* inst) const {
return static_cast<size_t>(std::distance(block.begin(), IR::Block::iterator(inst)));
}
@ -274,11 +276,8 @@ Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
return pass;
}
EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, CodePtr entrypoint_far, size_t size) {
EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
PerfMapRegister(entrypoint, code.getCurr(), LocationDescriptorToFriendlyName(descriptor));
code.SwitchToFarCode();
PerfMapRegister(entrypoint_far, code.getCurr(), LocationDescriptorToFriendlyName(descriptor) + "_far");
code.SwitchToNearCode();
Patch(descriptor, entrypoint);
BlockDescriptor block_desc{entrypoint, size};

View file

@ -6,6 +6,8 @@
#pragma once
#include <array>
#include <functional>
#include <memory>
#include <optional>
#include <string>
#include <type_traits>
@ -14,6 +16,7 @@
#include <mcl/bitsizeof.hpp>
#include <tsl/robin_map.h>
#include <tsl/robin_set.h>
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include "dynarmic/backend/x64/exception_handler.h"
@ -48,6 +51,7 @@ using HalfVectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof
struct EmitContext {
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
virtual ~EmitContext();
size_t GetInstOffset(IR::Inst* inst) const;
void EraseInstruction(IR::Inst* inst);
@ -58,8 +62,16 @@ struct EmitContext {
RegAlloc& reg_alloc;
IR::Block& block;
std::vector<std::function<void()>> deferred_emits;
};
using SharedLabel = std::shared_ptr<Xbyak::Label>;
inline SharedLabel GenSharedLabel() {
return std::make_shared<Xbyak::Label>();
}
class EmitX64 {
public:
struct BlockDescriptor {
@ -93,7 +105,7 @@ protected:
virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;
void EmitAddCycles(size_t cycles);
Xbyak::Label EmitCond(IR::Cond cond);
BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, CodePtr entrypoint_far, size_t size);
BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size);
void PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target);
// Terminal instruction emitters

View file

@ -152,18 +152,18 @@ void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) {
}
template<size_t fsize>
Xbyak::Label ProcessNaN(BlockOfCode& code, Xbyak::Xmm a) {
Xbyak::Label nan, end;
SharedLabel ProcessNaN(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm a) {
SharedLabel nan = GenSharedLabel(), end = GenSharedLabel();
FCODE(ucomis)(a, a);
code.jp(nan, code.T_NEAR);
code.SwitchToFarCode();
code.L(nan);
code.jp(*nan, code.T_NEAR);
code.orps(a, code.XmmBConst<fsize>(xword, fsize == 32 ? 0x00400000 : 0x0008'0000'0000'0000));
ctx.deferred_emits.emplace_back([=, &code] {
code.L(*nan);
code.orps(a, code.XmmBConst<fsize>(xword, fsize == 32 ? 0x00400000 : 0x0008'0000'0000'0000));
code.jmp(*end, code.T_NEAR);
});
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
return end;
}
@ -268,12 +268,12 @@ template<size_t fsize, typename Function>
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Label end;
SharedLabel end = GenSharedLabel();
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
end = ProcessNaN<fsize>(code, result);
end = ProcessNaN<fsize>(code, ctx, result);
}
if constexpr (std::is_member_function_pointer_v<Function>) {
(code.*fn)(result, result);
@ -287,7 +287,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
} else {
PostProcessNaN<fsize>(code, result, ctx.reg_alloc.ScratchXmm());
}
code.L(end);
code.L(*end);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -321,7 +321,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
Xbyak::Label end, nan, op_are_nans;
SharedLabel end = GenSharedLabel(), nan = GenSharedLabel();
code.movaps(result, op1);
if constexpr (std::is_member_function_pointer_v<Function>) {
@ -330,19 +330,21 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
fn(result, op2);
}
FCODE(ucomis)(result, result);
code.jp(nan, code.T_NEAR);
code.L(end);
code.jp(*nan, code.T_NEAR);
code.L(*end);
code.SwitchToFarCode();
code.L(nan);
FCODE(ucomis)(op1, op2);
code.jp(op_are_nans);
// Here we must return a positive NaN, because the indefinite value on x86 is a negative NaN!
code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::DefaultNaN()));
code.jmp(end, code.T_NEAR);
code.L(op_are_nans);
EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, end);
code.SwitchToNearCode();
ctx.deferred_emits.emplace_back([=, &code] {
Xbyak::Label op_are_nans;
code.L(*nan);
FCODE(ucomis)(op1, op2);
code.jp(op_are_nans);
// Here we must return a positive NaN, because the indefinite value on x86 is a negative NaN!
code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::DefaultNaN()));
code.jmp(*end, code.T_NEAR);
code.L(op_are_nans);
EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, *end);
});
ctx.reg_alloc.DefineValue(inst, result);
}
@ -428,39 +430,39 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
DenormalsAreZero<fsize>(code, ctx, {result, operand});
Xbyak::Label equal, end, nan;
SharedLabel equal = GenSharedLabel(), end = GenSharedLabel();
FCODE(ucomis)(result, operand);
code.jz(equal, code.T_NEAR);
code.jz(*equal, code.T_NEAR);
if constexpr (is_max) {
FCODE(maxs)(result, operand);
} else {
FCODE(mins)(result, operand);
}
code.L(end);
code.L(*end);
code.SwitchToFarCode();
ctx.deferred_emits.emplace_back([=, &code, &ctx] {
Xbyak::Label nan;
code.L(equal);
code.jp(nan);
if constexpr (is_max) {
code.andps(result, operand);
} else {
code.orps(result, operand);
}
code.jmp(end);
code.L(*equal);
code.jp(nan);
if constexpr (is_max) {
code.andps(result, operand);
} else {
code.orps(result, operand);
}
code.jmp(*end);
code.L(nan);
if (ctx.FPCR().DN()) {
code.movaps(result, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_nan : f64_nan));
code.jmp(end);
} else {
code.movaps(tmp, result);
FCODE(adds)(result, operand);
EmitPostProcessNaNs<fsize>(code, result, tmp, operand, gpr_scratch, end);
}
code.SwitchToNearCode();
code.L(nan);
if (ctx.FPCR().DN()) {
code.movaps(result, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_nan : f64_nan));
code.jmp(*end);
} else {
code.movaps(tmp, result);
FCODE(adds)(result, operand);
EmitPostProcessNaNs<fsize>(code, result, tmp, operand, gpr_scratch, *end);
}
});
ctx.reg_alloc.DefineValue(inst, result);
}
@ -469,7 +471,6 @@ template<size_t fsize, bool is_max>
static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
constexpr FPT default_nan = FP::FPInfo<FPT>::DefaultNaN();
constexpr u8 mantissa_msb_bit = static_cast<u8>(FP::FPInfo<FPT>::explicit_mantissa_width - 1);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -492,7 +493,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr();
tmp.setBit(fsize);
const auto move_to_tmp = [&](const Xbyak::Xmm& xmm) {
const auto move_to_tmp = [=, &code](const Xbyak::Xmm& xmm) {
if constexpr (fsize == 32) {
code.movd(tmp.cvt32(), xmm);
} else {
@ -500,78 +501,79 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
}
};
Xbyak::Label end, z, nan, op2_is_nan, snan, maybe_both_nan, normal;
SharedLabel end = GenSharedLabel(), z = GenSharedLabel();
FCODE(ucomis)(op1, op2);
code.jz(z, code.T_NEAR);
code.L(normal);
code.jz(*z, code.T_NEAR);
if constexpr (is_max) {
FCODE(maxs)(op2, op1);
} else {
FCODE(mins)(op2, op1);
}
code.L(end);
code.L(*end);
code.SwitchToFarCode();
ctx.deferred_emits.emplace_back([=, &code, &ctx] {
Xbyak::Label nan, op2_is_nan, snan, maybe_both_nan;
code.L(z);
code.jp(nan);
if constexpr (is_max) {
code.andps(op2, op1);
} else {
code.orps(op2, op1);
}
code.jmp(end);
constexpr u8 mantissa_msb_bit = static_cast<u8>(FP::FPInfo<FPT>::explicit_mantissa_width - 1);
// NaN requirements:
// op1 op2 result
// SNaN anything op1
// !SNaN SNaN op2
// QNaN !NaN op2
// !NaN QNaN op1
// QNaN QNaN op1
code.L(*z);
code.jp(nan);
if constexpr (is_max) {
code.andps(op2, op1);
} else {
code.orps(op2, op1);
}
code.jmp(*end);
code.L(nan);
FCODE(ucomis)(op1, op1);
code.jnp(op2_is_nan);
// NaN requirements:
// op1 op2 result
// SNaN anything op1
// !SNaN SNaN op2
// QNaN !NaN op2
// !NaN QNaN op1
// QNaN QNaN op1
// op1 is NaN
move_to_tmp(op1);
code.bt(tmp, mantissa_msb_bit);
code.jc(maybe_both_nan);
if (ctx.FPCR().DN()) {
code.L(snan);
code.movaps(op2, code.XmmBConst<fsize>(xword, default_nan));
code.jmp(end);
} else {
code.movaps(op2, op1);
code.L(snan);
code.orps(op2, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::mantissa_msb));
code.jmp(end);
}
code.L(nan);
FCODE(ucomis)(op1, op1);
code.jnp(op2_is_nan);
code.L(maybe_both_nan);
FCODE(ucomis)(op2, op2);
code.jnp(end, code.T_NEAR);
if (ctx.FPCR().DN()) {
code.jmp(snan);
} else {
// op1 is NaN
move_to_tmp(op1);
code.bt(tmp, mantissa_msb_bit);
code.jc(maybe_both_nan);
if (ctx.FPCR().DN()) {
code.L(snan);
code.movaps(op2, code.XmmBConst<fsize>(xword, default_nan));
code.jmp(*end);
} else {
code.movaps(op2, op1);
code.L(snan);
code.orps(op2, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::mantissa_msb));
code.jmp(*end);
}
code.L(maybe_both_nan);
FCODE(ucomis)(op2, op2);
code.jnp(*end, code.T_NEAR);
if (ctx.FPCR().DN()) {
code.jmp(snan);
} else {
move_to_tmp(op2);
code.bt(tmp.cvt64(), mantissa_msb_bit);
code.jnc(snan);
code.movaps(op2, op1);
code.jmp(*end);
}
// op2 is NaN
code.L(op2_is_nan);
move_to_tmp(op2);
code.bt(tmp.cvt64(), mantissa_msb_bit);
code.bt(tmp, mantissa_msb_bit);
code.jnc(snan);
code.movaps(op2, op1);
code.jmp(end);
}
// op2 is NaN
code.L(op2_is_nan);
move_to_tmp(op2);
code.bt(tmp, mantissa_msb_bit);
code.jnc(snan);
code.movaps(op2, op1);
code.jmp(end);
code.SwitchToNearCode();
code.jmp(*end);
});
}
ctx.reg_alloc.DefineValue(inst, op2);
@ -636,7 +638,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
}
if (code.HasHostFeature(HostFeature::FMA)) {
Xbyak::Label end, fallback;
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
@ -650,34 +652,34 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.movaps(tmp, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask));
code.andps(tmp, result);
FCODE(ucomis)(tmp, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal));
code.jz(fallback, code.T_NEAR);
code.L(end);
code.jz(*fallback, code.T_NEAR);
code.L(*end);
code.SwitchToFarCode();
code.L(fallback);
ctx.deferred_emits.emplace_back([=, &code, &ctx] {
code.L(*fallback);
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.movq(code.ABI_PARAM1, operand1);
code.movq(code.ABI_PARAM2, operand2);
code.movq(code.ABI_PARAM3, operand3);
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.movq(code.ABI_PARAM1, operand1);
code.movq(code.ABI_PARAM2, operand2);
code.movq(code.ABI_PARAM3, operand3);
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
#ifdef _WIN32
code.sub(rsp, 16 + ABI_SHADOW_SPACE);
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
code.CallFunction(&FP::FPMulAdd<FPT>);
code.add(rsp, 16 + ABI_SHADOW_SPACE);
code.sub(rsp, 16 + ABI_SHADOW_SPACE);
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
code.CallFunction(&FP::FPMulAdd<FPT>);
code.add(rsp, 16 + ABI_SHADOW_SPACE);
#else
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPMulAdd<FPT>);
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPMulAdd<FPT>);
#endif
code.movq(result, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.movq(result, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
return;
@ -735,7 +737,7 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg64 tmp = do_default_nan ? INVALID_REG : ctx.reg_alloc.ScratchGpr();
Xbyak::Label end, nan, op_are_nans;
SharedLabel end = GenSharedLabel(), nan = GenSharedLabel();
if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vmuls)(result, op1, op2);
@ -744,30 +746,32 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
FCODE(muls)(result, op2);
}
FCODE(ucomis)(result, result);
code.jp(nan, code.T_NEAR);
code.L(end);
code.jp(*nan, code.T_NEAR);
code.L(*end);
code.SwitchToFarCode();
code.L(nan);
FCODE(ucomis)(op1, op2);
code.jp(op_are_nans);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vxorps(result, op1, op2);
} else {
code.movaps(result, op1);
code.xorps(result, op2);
}
code.andps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::sign_mask));
code.orps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 2>()));
code.jmp(end, code.T_NEAR);
code.L(op_are_nans);
if (do_default_nan) {
code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::DefaultNaN()));
code.jmp(end, code.T_NEAR);
} else {
EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, end);
}
code.SwitchToNearCode();
ctx.deferred_emits.emplace_back([=, &code] {
Xbyak::Label op_are_nans;
code.L(*nan);
FCODE(ucomis)(op1, op2);
code.jp(op_are_nans);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vxorps(result, op1, op2);
} else {
code.movaps(result, op1);
code.xorps(result, op2);
}
code.andps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::sign_mask));
code.orps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 2>()));
code.jmp(*end, code.T_NEAR);
code.L(op_are_nans);
if (do_default_nan) {
code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::DefaultNaN()));
code.jmp(*end, code.T_NEAR);
} else {
EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, *end);
}
});
ctx.reg_alloc.DefineValue(inst, result);
}
@ -871,7 +875,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
}
if (code.HasHostFeature(HostFeature::FMA)) {
Xbyak::Label end, fallback;
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
@ -880,25 +884,25 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 2>()));
FCODE(vfnmadd231s)(result, operand1, operand2);
FCODE(ucomis)(result, result);
code.jp(fallback, code.T_NEAR);
code.L(end);
code.jp(*fallback, code.T_NEAR);
code.L(*end);
code.SwitchToFarCode();
code.L(fallback);
ctx.deferred_emits.emplace_back([=, &code, &ctx] {
code.L(*fallback);
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.movq(code.ABI_PARAM1, operand1);
code.movq(code.ABI_PARAM2, operand2);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRecipStepFused<FPT>);
code.movq(result, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.movq(code.ABI_PARAM1, operand1);
code.movq(code.ABI_PARAM2, operand2);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRecipStepFused<FPT>);
code.movq(result, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
return;
@ -1034,8 +1038,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm();
[[maybe_unused]] const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Label fallback, bad_values, end, default_nan;
bool needs_fallback = false;
SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel();
code.movaps(value, operand);
@ -1045,7 +1048,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
// Detect NaNs, negatives, zeros, denormals and infinities
FCODE(ucomis)(value, code.XmmBConst<fsize>(xword, FPT(1) << FP::FPInfo<FPT>::explicit_mantissa_width));
code.jna(bad_values, code.T_NEAR);
code.jna(*bad_values, code.T_NEAR);
FCODE(sqrts)(value, value);
ICODE(mov)(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 1>()));
@ -1054,109 +1057,110 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
ICODE(padd)(result, code.XmmBConst<fsize>(xword, fsize == 32 ? 0x00004000 : 0x0000'0800'0000'0000));
code.pand(result, xmm0);
code.L(end);
code.L(*end);
code.SwitchToFarCode();
ctx.deferred_emits.emplace_back([=, &code, &ctx] {
Xbyak::Label fallback, default_nan;
bool needs_fallback = false;
code.L(bad_values);
if constexpr (fsize == 32) {
code.movd(tmp, operand);
code.L(*bad_values);
if constexpr (fsize == 32) {
code.movd(tmp, operand);
if (!ctx.FPCR().FZ()) {
if (ctx.FPCR().DN()) {
// a > 0x80000000
code.cmp(tmp, 0x80000000);
code.ja(default_nan, code.T_NEAR);
}
// a > 0 && a < 0x00800000;
code.sub(tmp, 1);
code.cmp(tmp, 0x007FFFFF);
code.jb(fallback);
needs_fallback = true;
}
code.rsqrtss(result, operand);
if (!ctx.FPCR().FZ()) {
if (ctx.FPCR().DN()) {
// a > 0x80000000
code.cmp(tmp, 0x80000000);
code.ja(default_nan, code.T_NEAR);
}
// a > 0 && a < 0x00800000;
code.sub(tmp, 1);
code.cmp(tmp, 0x007FFFFF);
code.jb(fallback);
needs_fallback = true;
}
code.rsqrtss(result, operand);
if (ctx.FPCR().DN()) {
code.ucomiss(result, result);
code.jnp(end, code.T_NEAR);
} else {
// FZ ? (a >= 0x80800000 && a <= 0xFF800000) : (a >= 0x80000001 && a <= 0xFF800000)
// !FZ path takes into account the subtraction by one from the earlier block
code.add(tmp, ctx.FPCR().FZ() ? 0x7F800000 : 0x80000000);
code.cmp(tmp, ctx.FPCR().FZ() ? 0x7F000001 : 0x7F800000);
code.jnb(end, code.T_NEAR);
}
code.L(default_nan);
code.movd(result, code.XmmBConst<32>(xword, 0x7FC00000));
code.jmp(end, code.T_NEAR);
} else {
Xbyak::Label nan, zero;
code.movaps(value, operand);
DenormalsAreZero<fsize>(code, ctx, {value});
code.pxor(result, result);
code.ucomisd(value, result);
if (ctx.FPCR().DN()) {
code.jc(default_nan);
code.je(zero);
} else {
code.jp(nan);
code.je(zero);
code.jc(default_nan);
}
if (!ctx.FPCR().FZ()) {
needs_fallback = true;
code.jmp(fallback);
} else {
// result = 0
code.jmp(end, code.T_NEAR);
}
code.L(zero);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vpor(result, value, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000));
} else {
code.movaps(result, value);
code.por(result, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000));
}
code.jmp(end, code.T_NEAR);
code.L(nan);
if (!ctx.FPCR().DN()) {
if (code.HasHostFeature(HostFeature::AVX)) {
code.vpor(result, operand, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000));
code.ucomiss(result, result);
code.jnp(*end, code.T_NEAR);
} else {
code.movaps(result, operand);
code.por(result, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000));
// FZ ? (a >= 0x80800000 && a <= 0xFF800000) : (a >= 0x80000001 && a <= 0xFF800000)
// !FZ path takes into account the subtraction by one from the earlier block
code.add(tmp, ctx.FPCR().FZ() ? 0x7F800000 : 0x80000000);
code.cmp(tmp, ctx.FPCR().FZ() ? 0x7F000001 : 0x7F800000);
code.jnb(*end, code.T_NEAR);
}
code.jmp(end, code.T_NEAR);
code.L(default_nan);
code.movd(result, code.XmmBConst<32>(xword, 0x7FC00000));
code.jmp(*end, code.T_NEAR);
} else {
Xbyak::Label nan, zero;
code.movaps(value, operand);
DenormalsAreZero<fsize>(code, ctx, {value});
code.pxor(result, result);
code.ucomisd(value, result);
if (ctx.FPCR().DN()) {
code.jc(default_nan);
code.je(zero);
} else {
code.jp(nan);
code.je(zero);
code.jc(default_nan);
}
if (!ctx.FPCR().FZ()) {
needs_fallback = true;
code.jmp(fallback);
} else {
// result = 0
code.jmp(*end, code.T_NEAR);
}
code.L(zero);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vpor(result, value, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000));
} else {
code.movaps(result, value);
code.por(result, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000));
}
code.jmp(*end, code.T_NEAR);
code.L(nan);
if (!ctx.FPCR().DN()) {
if (code.HasHostFeature(HostFeature::AVX)) {
code.vpor(result, operand, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000));
} else {
code.movaps(result, operand);
code.por(result, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000));
}
code.jmp(*end, code.T_NEAR);
}
code.L(default_nan);
code.movq(result, code.XmmBConst<64>(xword, 0x7FF8'0000'0000'0000));
code.jmp(*end, code.T_NEAR);
}
code.L(default_nan);
code.movq(result, code.XmmBConst<64>(xword, 0x7FF8'0000'0000'0000));
code.jmp(end, code.T_NEAR);
}
code.L(fallback);
if (needs_fallback) {
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.movq(code.ABI_PARAM1, operand);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
code.movq(result, rax);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(end, code.T_NEAR);
}
code.SwitchToNearCode();
code.L(fallback);
if (needs_fallback) {
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.movq(code.ABI_PARAM1, operand);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
code.movq(result, rax);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(*end, code.T_NEAR);
}
});
ctx.reg_alloc.DefineValue(inst, result);
} else {
@ -1201,7 +1205,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
}
if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) {
Xbyak::Label end, fallback;
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
@ -1220,27 +1224,27 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.cmp(tmp.cvt16(), fsize == 32 ? 0x7f00 : 0x7fe0);
ctx.reg_alloc.Release(tmp);
code.jae(fallback, code.T_NEAR);
code.jae(*fallback, code.T_NEAR);
FCODE(vmuls)(result, result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, -1, 1>()));
code.L(end);
code.L(*end);
code.SwitchToFarCode();
code.L(fallback);
ctx.deferred_emits.emplace_back([=, &code, &ctx] {
code.L(*fallback);
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.movq(code.ABI_PARAM1, operand1);
code.movq(code.ABI_PARAM2, operand2);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtStepFused<FPT>);
code.movq(result, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.movq(code.ABI_PARAM1, operand1);
code.movq(code.ABI_PARAM2, operand2);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtStepFused<FPT>);
code.movq(result, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
return;
@ -1528,22 +1532,22 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if constexpr (isize == 64) {
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
Xbyak::Label saturate_max, end;
if (!unsigned_) {
SharedLabel saturate_max = GenSharedLabel(), end = GenSharedLabel();
ZeroIfNaN<64>(code, src, scratch);
code.movsd(scratch, code.XmmBConst<64>(xword, f64_max_s64_lim));
code.comisd(scratch, src);
code.jna(saturate_max, code.T_NEAR);
code.jna(*saturate_max, code.T_NEAR);
code.cvttsd2si(result, src); // 64 bit gpr
code.L(end);
code.L(*end);
code.SwitchToFarCode();
code.L(saturate_max);
code.mov(result, 0x7FFF'FFFF'FFFF'FFFF);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.deferred_emits.emplace_back([=, &code] {
code.L(*saturate_max);
code.mov(result, 0x7FFF'FFFF'FFFF'FFFF);
code.jmp(*end, code.T_NEAR);
});
} else {
Xbyak::Label below_max;

View file

@ -52,26 +52,27 @@ FakeCall AxxEmitX64::FastmemCallback(u64 rip_) {
template<std::size_t bitsize, auto callback>
void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool ordered = IsOrdered(args[1].GetImmediateAccType());
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
const auto fastmem_marker = ShouldFastmem(ctx, inst);
if (!conf.page_table && !fastmem_marker) {
// Neither fastmem nor page table: Use callbacks
if constexpr (bitsize == 128) {
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
ctx.reg_alloc.HostCall(nullptr, {}, args[1]);
if (ordered) {
code.mfence();
}
code.CallFunction(memory_read_128);
ctx.reg_alloc.DefineValue(inst, xmm1);
} else {
ctx.reg_alloc.HostCall(inst, {}, args[0]);
ctx.reg_alloc.HostCall(inst, {}, args[1]);
if (ordered) {
code.mfence();
}
Devirtualize<callback>(conf.callbacks).EmitCall(code);
code.ZeroExtendFrom(bitsize, code.ABI_RETURN);
}
EmitCheckMemoryAbort(ctx, inst);
return;
}
@ -83,44 +84,50 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
}
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx();
const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
Xbyak::Label abort, end;
bool require_abort_handling = false;
SharedLabel abort = GenSharedLabel(), end = GenSharedLabel();
if (fastmem_marker) {
// Use fastmem
const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling);
bool require_abort_handling;
const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling);
const auto location = EmitReadMemoryMov<bitsize>(code, value_idx, src_ptr, ordered);
fastmem_patch_info.emplace(
mcl::bit_cast<u64>(location),
FastmemPatchInfo{
mcl::bit_cast<u64>(code.getCurr()),
mcl::bit_cast<u64>(wrapped_fn),
*fastmem_marker,
conf.recompile_on_fastmem_failure,
});
ctx.deferred_emits.emplace_back([=, this, &ctx] {
code.L(*abort);
code.call(wrapped_fn);
fastmem_patch_info.emplace(
mcl::bit_cast<u64>(location),
FastmemPatchInfo{
mcl::bit_cast<u64>(code.getCurr()),
mcl::bit_cast<u64>(wrapped_fn),
*fastmem_marker,
conf.recompile_on_fastmem_failure,
});
EmitCheckMemoryAbort(ctx, inst, end.get());
code.jmp(*end, code.T_NEAR);
});
} else {
// Use page table
ASSERT(conf.page_table);
const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr);
require_abort_handling = true;
const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, *abort, vaddr);
EmitReadMemoryMov<bitsize>(code, value_idx, src_ptr, ordered);
}
code.L(end);
if (require_abort_handling) {
code.SwitchToFarCode();
code.L(abort);
code.call(wrapped_fn);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.deferred_emits.emplace_back([=, this, &ctx] {
code.L(*abort);
code.call(wrapped_fn);
EmitCheckMemoryAbort(ctx, inst, end.get());
code.jmp(*end, code.T_NEAR);
});
}
code.L(*end);
if constexpr (bitsize == 128) {
ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx});
@ -132,24 +139,25 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
template<std::size_t bitsize, auto callback>
void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
const auto fastmem_marker = ShouldFastmem(ctx, inst);
if (!conf.page_table && !fastmem_marker) {
// Neither fastmem nor page table: Use callbacks
if constexpr (bitsize == 128) {
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
ctx.reg_alloc.Use(args[1], HostLoc::XMM1);
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
code.CallFunction(memory_write_128);
} else {
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
ctx.reg_alloc.HostCall(nullptr, {}, args[1], args[2]);
Devirtualize<callback>(conf.callbacks).EmitCall(code);
}
if (ordered) {
code.mfence();
}
EmitCheckMemoryAbort(ctx, inst);
return;
}
@ -161,58 +169,64 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
}
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const int value_idx = bitsize == 128
? ctx.reg_alloc.UseXmm(args[1]).getIdx()
: (ordered ? ctx.reg_alloc.UseScratchGpr(args[1]).getIdx() : ctx.reg_alloc.UseGpr(args[1]).getIdx());
? ctx.reg_alloc.UseXmm(args[2]).getIdx()
: (ordered ? ctx.reg_alloc.UseScratchGpr(args[2]).getIdx() : ctx.reg_alloc.UseGpr(args[2]).getIdx());
const auto wrapped_fn = write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
Xbyak::Label abort, end;
bool require_abort_handling = false;
SharedLabel abort = GenSharedLabel(), end = GenSharedLabel();
if (fastmem_marker) {
// Use fastmem
const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling);
bool require_abort_handling;
const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling);
const auto location = EmitWriteMemoryMov<bitsize>(code, dest_ptr, value_idx, ordered);
fastmem_patch_info.emplace(
mcl::bit_cast<u64>(location),
FastmemPatchInfo{
mcl::bit_cast<u64>(code.getCurr()),
mcl::bit_cast<u64>(wrapped_fn),
*fastmem_marker,
conf.recompile_on_fastmem_failure,
});
ctx.deferred_emits.emplace_back([=, this, &ctx] {
code.L(*abort);
code.call(wrapped_fn);
fastmem_patch_info.emplace(
mcl::bit_cast<u64>(location),
FastmemPatchInfo{
mcl::bit_cast<u64>(code.getCurr()),
mcl::bit_cast<u64>(wrapped_fn),
*fastmem_marker,
conf.recompile_on_fastmem_failure,
});
EmitCheckMemoryAbort(ctx, inst, end.get());
code.jmp(*end, code.T_NEAR);
});
} else {
// Use page table
ASSERT(conf.page_table);
const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr);
require_abort_handling = true;
const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, *abort, vaddr);
EmitWriteMemoryMov<bitsize>(code, dest_ptr, value_idx, ordered);
}
code.L(end);
if (require_abort_handling) {
code.SwitchToFarCode();
code.L(abort);
code.call(wrapped_fn);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.deferred_emits.emplace_back([=, this, &ctx] {
code.L(*abort);
code.call(wrapped_fn);
EmitCheckMemoryAbort(ctx, inst, end.get());
code.jmp(*end, code.T_NEAR);
});
}
code.L(*end);
}
template<std::size_t bitsize, auto callback>
void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
ASSERT(conf.global_monitor != nullptr);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool ordered = IsOrdered(args[1].GetImmediateAccType());
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
if constexpr (bitsize != 128) {
using T = mcl::unsigned_integer_of_size<bitsize>;
ctx.reg_alloc.HostCall(inst, {}, args[0]);
ctx.reg_alloc.HostCall(inst, {}, args[1]);
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
@ -228,7 +242,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
code.ZeroExtendFrom(bitsize, code.ABI_RETURN);
} else {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
@ -250,19 +264,21 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result);
}
EmitCheckMemoryAbort(ctx, inst);
}
template<std::size_t bitsize, auto callback>
void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
ASSERT(conf.global_monitor != nullptr);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
if constexpr (bitsize != 128) {
ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]);
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
} else {
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
ctx.reg_alloc.Use(args[1], HostLoc::XMM1);
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(inst);
}
@ -308,6 +324,8 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
}
code.L(end);
EmitCheckMemoryAbort(ctx, inst);
}
template<std::size_t bitsize, auto callback>
@ -329,7 +347,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
}
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr();
@ -344,10 +362,10 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
const auto fastmem_marker = ShouldFastmem(ctx, inst);
if (fastmem_marker) {
Xbyak::Label abort, end;
SharedLabel abort = GenSharedLabel(), end = GenSharedLabel();
bool require_abort_handling = false;
const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling);
const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling);
const auto location = EmitReadMemoryMov<bitsize>(code, value_idx, src_ptr, ordered);
@ -360,14 +378,14 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
conf.recompile_on_exclusive_fastmem_failure,
});
code.L(end);
code.L(*end);
if (require_abort_handling) {
code.SwitchToFarCode();
code.L(abort);
code.call(wrapped_fn);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.deferred_emits.emplace_back([=, this] {
code.L(*abort);
code.call(wrapped_fn);
code.jmp(*end, code.T_NEAR);
});
}
} else {
code.call(wrapped_fn);
@ -383,6 +401,8 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
} else {
ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx});
}
EmitCheckMemoryAbort(ctx, inst);
}
template<std::size_t bitsize, auto callback>
@ -402,13 +422,13 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
return ctx.reg_alloc.UseXmm(args[1]);
return ctx.reg_alloc.UseXmm(args[2]);
} else {
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
return ctx.reg_alloc.UseGpr(args[1]);
return ctx.reg_alloc.UseGpr(args[2]);
}
}();
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
@ -416,14 +436,14 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
EmitExclusiveLock(code, conf, tmp, eax);
Xbyak::Label end;
SharedLabel end = GenSharedLabel();
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
code.mov(status, u32(1));
code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
code.je(end, code.T_NEAR);
code.je(*end, code.T_NEAR);
code.cmp(qword[tmp], vaddr);
code.jne(end, code.T_NEAR);
code.jne(*end, code.T_NEAR);
EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax);
@ -448,10 +468,10 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
const auto fastmem_marker = ShouldFastmem(ctx, inst);
if (fastmem_marker) {
Xbyak::Label abort;
SharedLabel abort = GenSharedLabel();
bool require_abort_handling = false;
const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling, tmp);
const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling, tmp);
const auto location = code.getCurr();
@ -483,24 +503,24 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
code.setnz(status.cvt8());
code.SwitchToFarCode();
code.L(abort);
code.call(wrapped_fn);
ctx.deferred_emits.emplace_back([=, this] {
code.L(*abort);
code.call(wrapped_fn);
fastmem_patch_info.emplace(
mcl::bit_cast<u64>(location),
FastmemPatchInfo{
mcl::bit_cast<u64>(code.getCurr()),
mcl::bit_cast<u64>(wrapped_fn),
*fastmem_marker,
conf.recompile_on_exclusive_fastmem_failure,
});
fastmem_patch_info.emplace(
mcl::bit_cast<u64>(location),
FastmemPatchInfo{
mcl::bit_cast<u64>(code.getCurr()),
mcl::bit_cast<u64>(wrapped_fn),
*fastmem_marker,
conf.recompile_on_exclusive_fastmem_failure,
});
code.cmp(al, 0);
code.setz(status.cvt8());
code.movzx(status.cvt32(), status.cvt8());
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
code.cmp(al, 0);
code.setz(status.cvt8());
code.movzx(status.cvt32(), status.cvt8());
code.jmp(*end, code.T_NEAR);
});
} else {
code.call(wrapped_fn);
code.cmp(al, 0);
@ -508,11 +528,13 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
code.movzx(status.cvt32(), status.cvt8());
}
code.L(end);
code.L(*end);
EmitExclusiveUnlock(code, conf, tmp, eax);
ctx.reg_alloc.DefineValue(inst, status);
EmitCheckMemoryAbort(ctx, inst);
}
#undef AxxEmitX64

View file

@ -53,19 +53,19 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi
const u32 page_align_mask = static_cast<u32>(page_size - 1) & ~align_mask;
Xbyak::Label detect_boundary, resume;
SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel();
code.jnz(detect_boundary, code.T_NEAR);
code.L(resume);
code.jnz(*detect_boundary, code.T_NEAR);
code.L(*resume);
code.SwitchToFarCode();
code.L(detect_boundary);
code.mov(tmp, vaddr);
code.and_(tmp, page_align_mask);
code.cmp(tmp, page_align_mask);
code.jne(resume, code.T_NEAR);
// NOTE: We expect to fallthrough into abort code here.
code.SwitchToNearCode();
ctx.deferred_emits.emplace_back([=, &code] {
code.L(*detect_boundary);
code.mov(tmp, vaddr);
code.and_(tmp, page_align_mask);
code.cmp(tmp, page_align_mask);
code.jne(*resume, code.T_NEAR);
// NOTE: We expect to fallthrough into abort code here.
});
}
template<typename EmitContext>

View file

@ -112,36 +112,35 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std::
code.cmp(bitmask, 0);
}
Xbyak::Label end;
Xbyak::Label nan;
SharedLabel end = GenSharedLabel(), nan = GenSharedLabel();
code.jnz(nan, code.T_NEAR);
code.L(end);
code.jnz(*nan, code.T_NEAR);
code.L(*end);
code.SwitchToFarCode();
code.L(nan);
ctx.deferred_emits.emplace_back([=, &code, &ctx] {
code.L(*nan);
const Xbyak::Xmm result = xmms[0];
const Xbyak::Xmm result = xmms[0];
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
const size_t stack_space = xmms.size() * 16;
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
for (size_t i = 0; i < xmms.size(); ++i) {
code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], xmms[i]);
}
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.mov(code.ABI_PARAM2, ctx.FPCR(fpcr_controlled).Value());
const size_t stack_space = xmms.size() * 16;
code.sub(rsp, static_cast<u32>(stack_space + ABI_SHADOW_SPACE));
for (size_t i = 0; i < xmms.size(); ++i) {
code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], xmms[i]);
}
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.mov(code.ABI_PARAM2, ctx.FPCR(fpcr_controlled).Value());
code.CallFunction(nan_handler);
code.CallFunction(nan_handler);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.add(rsp, static_cast<u32>(stack_space + ABI_SHADOW_SPACE));
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(*end, code.T_NEAR);
});
}
template<size_t fsize>
@ -1117,7 +1116,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
Xbyak::Label end, fallback;
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.movaps(result, xmm_a);
@ -1127,19 +1126,19 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.andnps(tmp, result);
FCODE(vcmpeq_uqp)(tmp, tmp, GetSmallestNormalVector<fsize>(code));
code.vptest(tmp, tmp);
code.jnz(fallback, code.T_NEAR);
code.L(end);
code.jnz(*fallback, code.T_NEAR);
code.L(*end);
});
code.SwitchToFarCode();
code.L(fallback);
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, xmm_a, xmm_b, xmm_c, fallback_fn, fpcr_controlled);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.deferred_emits.emplace_back([=, &code, &ctx] {
code.L(*fallback);
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, xmm_a, xmm_b, xmm_c, fallback_fn, fpcr_controlled);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
return;
@ -1377,7 +1376,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
Xbyak::Label end, fallback;
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
@ -1385,19 +1384,19 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
FCODE(vcmpunordp)(tmp, result, result);
code.vptest(tmp, tmp);
code.jnz(fallback, code.T_NEAR);
code.L(end);
code.jnz(*fallback, code.T_NEAR);
code.L(*end);
});
code.SwitchToFarCode();
code.L(fallback);
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.deferred_emits.emplace_back([=, &code, &ctx] {
code.L(*fallback);
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
return;
@ -1591,7 +1590,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
Xbyak::Label end, fallback;
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
@ -1602,21 +1601,21 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
FCODE(vandp)(tmp, result, mask);
ICODE(vpcmpeq)(tmp, tmp, mask);
code.ptest(tmp, tmp);
code.jnz(fallback, code.T_NEAR);
code.jnz(*fallback, code.T_NEAR);
FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code));
code.L(end);
code.L(*end);
});
code.SwitchToFarCode();
code.L(fallback);
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.deferred_emits.emplace_back([=, &code, &ctx] {
code.L(*fallback);
code.sub(rsp, 8);
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.add(rsp, 8);
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
return;

View file

@ -69,6 +69,10 @@ std::string DisassembleAArch32([[maybe_unused]] bool is_thumb, [[maybe_unused]]
char buffer[1024];
while (length) {
size_t inst_size = LLVMDisasmInstruction(llvm_ctx, const_cast<u8*>(instructions), length, pc, buffer, sizeof(buffer));
const char* const disassembled = inst_size > 0 ? buffer : "<invalid instruction>";
if (inst_size == 0)
inst_size = is_thumb ? 2 : 4;
result += fmt::format("{:08x} ", pc);
for (size_t i = 0; i < 4; i++) {
@ -78,11 +82,9 @@ std::string DisassembleAArch32([[maybe_unused]] bool is_thumb, [[maybe_unused]]
result += " ";
}
}
result += inst_size > 0 ? buffer : "<invalid instruction>";
result += disassembled;
result += '\n';
if (inst_size == 0)
inst_size = is_thumb ? 2 : 4;
if (length <= inst_size)
break;
@ -111,7 +113,8 @@ std::string DisassembleAArch64([[maybe_unused]] u32 instruction, [[maybe_unused]
char buffer[80];
size_t inst_size = LLVMDisasmInstruction(llvm_ctx, (u8*)&instruction, sizeof(instruction), pc, buffer, sizeof(buffer));
result = inst_size > 0 ? buffer : "<invalid instruction>";
result = fmt::format("{:016x} {:08x} ", pc, instruction);
result += inst_size > 0 ? buffer : "<invalid instruction>";
result += '\n';
LLVMDisasmDispose(llvm_ctx);

View file

@ -245,40 +245,40 @@ IR::UAny IREmitter::ReadMemory(size_t bitsize, const IR::U32& vaddr, IR::AccType
}
IR::U8 IREmitter::ReadMemory8(const IR::U32& vaddr, IR::AccType acc_type) {
return Inst<IR::U8>(Opcode::A32ReadMemory8, vaddr, IR::Value{acc_type});
return Inst<IR::U8>(Opcode::A32ReadMemory8, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
IR::U16 IREmitter::ReadMemory16(const IR::U32& vaddr, IR::AccType acc_type) {
const auto value = Inst<IR::U16>(Opcode::A32ReadMemory16, vaddr, IR::Value{acc_type});
const auto value = Inst<IR::U16>(Opcode::A32ReadMemory16, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
return current_location.EFlag() ? ByteReverseHalf(value) : value;
}
IR::U32 IREmitter::ReadMemory32(const IR::U32& vaddr, IR::AccType acc_type) {
const auto value = Inst<IR::U32>(Opcode::A32ReadMemory32, vaddr, IR::Value{acc_type});
const auto value = Inst<IR::U32>(Opcode::A32ReadMemory32, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
return current_location.EFlag() ? ByteReverseWord(value) : value;
}
IR::U64 IREmitter::ReadMemory64(const IR::U32& vaddr, IR::AccType acc_type) {
const auto value = Inst<IR::U64>(Opcode::A32ReadMemory64, vaddr, IR::Value{acc_type});
const auto value = Inst<IR::U64>(Opcode::A32ReadMemory64, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
return current_location.EFlag() ? ByteReverseDual(value) : value;
}
IR::U8 IREmitter::ExclusiveReadMemory8(const IR::U32& vaddr, IR::AccType acc_type) {
return Inst<IR::U8>(Opcode::A32ExclusiveReadMemory8, vaddr, IR::Value{acc_type});
return Inst<IR::U8>(Opcode::A32ExclusiveReadMemory8, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
IR::U16 IREmitter::ExclusiveReadMemory16(const IR::U32& vaddr, IR::AccType acc_type) {
const auto value = Inst<IR::U16>(Opcode::A32ExclusiveReadMemory16, vaddr, IR::Value{acc_type});
const auto value = Inst<IR::U16>(Opcode::A32ExclusiveReadMemory16, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
return current_location.EFlag() ? ByteReverseHalf(value) : value;
}
IR::U32 IREmitter::ExclusiveReadMemory32(const IR::U32& vaddr, IR::AccType acc_type) {
const auto value = Inst<IR::U32>(Opcode::A32ExclusiveReadMemory32, vaddr, IR::Value{acc_type});
const auto value = Inst<IR::U32>(Opcode::A32ExclusiveReadMemory32, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
return current_location.EFlag() ? ByteReverseWord(value) : value;
}
std::pair<IR::U32, IR::U32> IREmitter::ExclusiveReadMemory64(const IR::U32& vaddr, IR::AccType acc_type) {
const auto value = Inst<IR::U64>(Opcode::A32ExclusiveReadMemory64, vaddr, IR::Value{acc_type});
const auto value = Inst<IR::U64>(Opcode::A32ExclusiveReadMemory64, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
const auto lo = LeastSignificantWord(value);
const auto hi = MostSignificantWord(value).result;
if (current_location.EFlag()) {
@ -303,55 +303,55 @@ void IREmitter::WriteMemory(size_t bitsize, const IR::U32& vaddr, const IR::UAny
}
void IREmitter::WriteMemory8(const IR::U32& vaddr, const IR::U8& value, IR::AccType acc_type) {
Inst(Opcode::A32WriteMemory8, vaddr, value, IR::Value{acc_type});
Inst(Opcode::A32WriteMemory8, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
void IREmitter::WriteMemory16(const IR::U32& vaddr, const IR::U16& value, IR::AccType acc_type) {
if (current_location.EFlag()) {
const auto v = ByteReverseHalf(value);
Inst(Opcode::A32WriteMemory16, vaddr, v, IR::Value{acc_type});
Inst(Opcode::A32WriteMemory16, ImmCurrentLocationDescriptor(), vaddr, v, IR::Value{acc_type});
} else {
Inst(Opcode::A32WriteMemory16, vaddr, value, IR::Value{acc_type});
Inst(Opcode::A32WriteMemory16, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
}
void IREmitter::WriteMemory32(const IR::U32& vaddr, const IR::U32& value, IR::AccType acc_type) {
if (current_location.EFlag()) {
const auto v = ByteReverseWord(value);
Inst(Opcode::A32WriteMemory32, vaddr, v, IR::Value{acc_type});
Inst(Opcode::A32WriteMemory32, ImmCurrentLocationDescriptor(), vaddr, v, IR::Value{acc_type});
} else {
Inst(Opcode::A32WriteMemory32, vaddr, value, IR::Value{acc_type});
Inst(Opcode::A32WriteMemory32, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
}
void IREmitter::WriteMemory64(const IR::U32& vaddr, const IR::U64& value, IR::AccType acc_type) {
if (current_location.EFlag()) {
const auto v = ByteReverseDual(value);
Inst(Opcode::A32WriteMemory64, vaddr, v, IR::Value{acc_type});
Inst(Opcode::A32WriteMemory64, ImmCurrentLocationDescriptor(), vaddr, v, IR::Value{acc_type});
} else {
Inst(Opcode::A32WriteMemory64, vaddr, value, IR::Value{acc_type});
Inst(Opcode::A32WriteMemory64, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
}
IR::U32 IREmitter::ExclusiveWriteMemory8(const IR::U32& vaddr, const IR::U8& value, IR::AccType acc_type) {
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory8, vaddr, value, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory8, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
IR::U32 IREmitter::ExclusiveWriteMemory16(const IR::U32& vaddr, const IR::U16& value, IR::AccType acc_type) {
if (current_location.EFlag()) {
const auto v = ByteReverseHalf(value);
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory16, vaddr, v, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory16, ImmCurrentLocationDescriptor(), vaddr, v, IR::Value{acc_type});
} else {
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory16, vaddr, value, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory16, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
}
IR::U32 IREmitter::ExclusiveWriteMemory32(const IR::U32& vaddr, const IR::U32& value, IR::AccType acc_type) {
if (current_location.EFlag()) {
const auto v = ByteReverseWord(value);
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory32, vaddr, v, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory32, ImmCurrentLocationDescriptor(), vaddr, v, IR::Value{acc_type});
} else {
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory32, vaddr, value, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory32, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
}
@ -359,9 +359,9 @@ IR::U32 IREmitter::ExclusiveWriteMemory64(const IR::U32& vaddr, const IR::U32& v
if (current_location.EFlag()) {
const auto vlo = ByteReverseWord(value_lo);
const auto vhi = ByteReverseWord(value_hi);
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory64, vaddr, Pack2x32To1x64(vlo, vhi), IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory64, ImmCurrentLocationDescriptor(), vaddr, Pack2x32To1x64(vlo, vhi), IR::Value{acc_type});
} else {
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory64, vaddr, Pack2x32To1x64(value_lo, value_hi), IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A32ExclusiveWriteMemory64, ImmCurrentLocationDescriptor(), vaddr, Pack2x32To1x64(value_lo, value_hi), IR::Value{acc_type});
}
}
@ -439,4 +439,8 @@ void IREmitter::CoprocStoreWords(size_t coproc_no, bool two, bool long_transfer,
Inst(Opcode::A32CoprocStoreWords, IR::Value(coproc_info), address);
}
IR::U64 IREmitter::ImmCurrentLocationDescriptor() {
return Imm64(IR::LocationDescriptor{current_location}.Value());
}
} // namespace Dynarmic::A32

View file

@ -110,6 +110,7 @@ public:
private:
enum ArchVersion arch_version;
IR::U64 ImmCurrentLocationDescriptor();
};
} // namespace Dynarmic::A32

View file

@ -29,12 +29,6 @@ struct TranslationOptions {
/// If this is false, we treat the instruction as a NOP.
/// If this is true, we emit an ExceptionRaised instruction.
bool hook_hint_instructions = true;
/// This changes what IR we emit when we translate a memory instruction.
/// If this is false, memory accesses are not considered terminal.
/// If this is true, memory access are considered terminal. This allows
/// accurately emulating protection fault handlers.
bool check_halt_on_memory_access = false;
};
/**

View file

@ -53,15 +53,6 @@ bool TranslatorVisitor::RaiseException(Exception exception) {
return false;
}
bool TranslatorVisitor::MemoryInstructionContinues() {
if (options.check_halt_on_memory_access) {
ir.SetTerm(IR::Term::LinkBlock{ir.current_location.AdvancePC(static_cast<s32>(current_instruction_size))});
return false;
}
return true;
}
IR::UAny TranslatorVisitor::I(size_t bitsize, u64 value) {
switch (bitsize) {
case 8:

View file

@ -41,7 +41,6 @@ struct TranslatorVisitor final {
bool UndefinedInstruction();
bool DecodeError();
bool RaiseException(Exception exception);
bool MemoryInstructionContinues();
struct ImmAndCarry {
u32 imm32;

View file

@ -119,7 +119,7 @@ bool TranslatorVisitor::v8_VST_multiple(bool D, Reg n, size_t Vd, Imm<4> type, s
}
}
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t size, size_t align, Reg m) {
@ -176,7 +176,7 @@ bool TranslatorVisitor::v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type, s
}
}
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::v8_VLD_all_lanes(bool D, Reg n, size_t Vd, size_t nn, size_t sz, bool T, bool a, Reg m) {
@ -241,7 +241,7 @@ bool TranslatorVisitor::v8_VLD_all_lanes(bool D, Reg n, size_t Vd, size_t nn, si
}
}
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::v8_VST_single(bool D, Reg n, size_t Vd, size_t sz, size_t nn, size_t index_align, Reg m) {
@ -305,7 +305,7 @@ bool TranslatorVisitor::v8_VST_single(bool D, Reg n, size_t Vd, size_t sz, size_
}
}
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::v8_VLD_single(bool D, Reg n, size_t Vd, size_t sz, size_t nn, size_t index_align, Reg m) {
@ -370,6 +370,6 @@ bool TranslatorVisitor::v8_VLD_single(bool D, Reg n, size_t Vd, size_t sz, size_
}
}
return MemoryInstructionContinues();
return true;
}
} // namespace Dynarmic::A32

View file

@ -83,7 +83,7 @@ bool TranslatorVisitor::arm_LDR_lit(Cond cond, bool U, Reg t, Imm<12> imm12) {
}
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDR <Rt>, [<Rn>, #+/-<imm>]{!}
@ -120,7 +120,7 @@ bool TranslatorVisitor::arm_LDR_imm(Cond cond, bool P, bool U, bool W, Reg n, Re
}
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDR <Rt>, [<Rn>, #+/-<Rm>]{!}
@ -150,7 +150,7 @@ bool TranslatorVisitor::arm_LDR_reg(Cond cond, bool P, bool U, bool W, Reg n, Re
}
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRB <Rt>, [PC, #+/-<imm>]
@ -170,7 +170,7 @@ bool TranslatorVisitor::arm_LDRB_lit(Cond cond, bool U, Reg t, Imm<12> imm12) {
const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(ir.Imm32(address), IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRB <Rt>, [<Rn>, #+/-<imm>]{!}
@ -199,7 +199,7 @@ bool TranslatorVisitor::arm_LDRB_imm(Cond cond, bool P, bool U, bool W, Reg n, R
const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRB <Rt>, [<Rn>, #+/-<Rm>]{!}
@ -223,7 +223,7 @@ bool TranslatorVisitor::arm_LDRB_reg(Cond cond, bool P, bool U, bool W, Reg n, R
const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRD <Rt>, <Rt2>, [PC, #+/-<imm>]
@ -257,7 +257,7 @@ bool TranslatorVisitor::arm_LDRD_lit(Cond cond, bool U, Reg t, Imm<4> imm8a, Imm
ir.SetRegister(t, ir.LeastSignificantWord(data));
ir.SetRegister(t2, ir.MostSignificantWord(data).result);
}
return MemoryInstructionContinues();
return true;
}
// LDRD <Rt>, [<Rn>, #+/-<imm>]{!}
@ -303,7 +303,7 @@ bool TranslatorVisitor::arm_LDRD_imm(Cond cond, bool P, bool U, bool W, Reg n, R
ir.SetRegister(t, ir.LeastSignificantWord(data));
ir.SetRegister(t2, ir.MostSignificantWord(data).result);
}
return MemoryInstructionContinues();
return true;
}
// LDRD <Rt>, [<Rn>, #+/-<Rm>]{!}
@ -343,7 +343,7 @@ bool TranslatorVisitor::arm_LDRD_reg(Cond cond, bool P, bool U, bool W, Reg n, R
ir.SetRegister(t, ir.LeastSignificantWord(data));
ir.SetRegister(t2, ir.MostSignificantWord(data).result);
}
return MemoryInstructionContinues();
return true;
}
// LDRH <Rt>, [PC, #-/+<imm>]
@ -368,7 +368,7 @@ bool TranslatorVisitor::arm_LDRH_lit(Cond cond, bool P, bool U, bool W, Reg t, I
const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(ir.Imm32(address), IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRH <Rt>, [<Rn>, #+/-<imm>]{!}
@ -397,7 +397,7 @@ bool TranslatorVisitor::arm_LDRH_imm(Cond cond, bool P, bool U, bool W, Reg n, R
const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRH <Rt>, [<Rn>, #+/-<Rm>]{!}
@ -421,7 +421,7 @@ bool TranslatorVisitor::arm_LDRH_reg(Cond cond, bool P, bool U, bool W, Reg n, R
const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRSB <Rt>, [PC, #+/-<imm>]
@ -442,7 +442,7 @@ bool TranslatorVisitor::arm_LDRSB_lit(Cond cond, bool U, Reg t, Imm<4> imm8a, Im
const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(ir.Imm32(address), IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRSB <Rt>, [<Rn>, #+/-<imm>]{!}
@ -471,7 +471,7 @@ bool TranslatorVisitor::arm_LDRSB_imm(Cond cond, bool P, bool U, bool W, Reg n,
const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRSB <Rt>, [<Rn>, #+/-<Rm>]{!}
@ -495,7 +495,7 @@ bool TranslatorVisitor::arm_LDRSB_reg(Cond cond, bool P, bool U, bool W, Reg n,
const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRSH <Rt>, [PC, #-/+<imm>]
@ -515,7 +515,7 @@ bool TranslatorVisitor::arm_LDRSH_lit(Cond cond, bool U, Reg t, Imm<4> imm8a, Im
const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(ir.Imm32(address), IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRSH <Rt>, [<Rn>, #+/-<imm>]{!}
@ -544,7 +544,7 @@ bool TranslatorVisitor::arm_LDRSH_imm(Cond cond, bool P, bool U, bool W, Reg n,
const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRSH <Rt>, [<Rn>, #+/-<Rm>]{!}
@ -568,7 +568,7 @@ bool TranslatorVisitor::arm_LDRSH_reg(Cond cond, bool P, bool U, bool W, Reg n,
const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// STR <Rt>, [<Rn>, #+/-<imm>]{!}
@ -585,7 +585,7 @@ bool TranslatorVisitor::arm_STR_imm(Cond cond, bool P, bool U, bool W, Reg n, Re
const auto offset = ir.Imm32(imm12.ZeroExtend());
const auto address = GetAddress(ir, P, U, W, n, offset);
ir.WriteMemory32(address, ir.GetRegister(t), IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
// STR <Rt>, [<Rn>, #+/-<Rm>]{!}
@ -606,7 +606,7 @@ bool TranslatorVisitor::arm_STR_reg(Cond cond, bool P, bool U, bool W, Reg n, Re
const auto offset = EmitImmShift(ir.GetRegister(m), shift, imm5, ir.GetCFlag()).result;
const auto address = GetAddress(ir, P, U, W, n, offset);
ir.WriteMemory32(address, ir.GetRegister(t), IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
// STRB <Rt>, [<Rn>, #+/-<imm>]{!}
@ -627,7 +627,7 @@ bool TranslatorVisitor::arm_STRB_imm(Cond cond, bool P, bool U, bool W, Reg n, R
const auto offset = ir.Imm32(imm12.ZeroExtend());
const auto address = GetAddress(ir, P, U, W, n, offset);
ir.WriteMemory8(address, ir.LeastSignificantByte(ir.GetRegister(t)), IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
// STRB <Rt>, [<Rn>, #+/-<Rm>]{!}
@ -648,7 +648,7 @@ bool TranslatorVisitor::arm_STRB_reg(Cond cond, bool P, bool U, bool W, Reg n, R
const auto offset = EmitImmShift(ir.GetRegister(m), shift, imm5, ir.GetCFlag()).result;
const auto address = GetAddress(ir, P, U, W, n, offset);
ir.WriteMemory8(address, ir.LeastSignificantByte(ir.GetRegister(t)), IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
// STRD <Rt>, [<Rn>, #+/-<imm>]{!}
@ -686,7 +686,7 @@ bool TranslatorVisitor::arm_STRD_imm(Cond cond, bool P, bool U, bool W, Reg n, R
// NOTE: If alignment is exactly off by 4, each word is an atomic access.
ir.WriteMemory64(address, data, IR::AccType::ATOMIC);
return MemoryInstructionContinues();
return true;
}
// STRD <Rt>, [<Rn>, #+/-<Rm>]{!}
@ -723,7 +723,7 @@ bool TranslatorVisitor::arm_STRD_reg(Cond cond, bool P, bool U, bool W, Reg n, R
// NOTE: If alignment is exactly off by 4, each word is an atomic access.
ir.WriteMemory64(address, data, IR::AccType::ATOMIC);
return MemoryInstructionContinues();
return true;
}
// STRH <Rt>, [<Rn>, #+/-<imm>]{!}
@ -746,7 +746,7 @@ bool TranslatorVisitor::arm_STRH_imm(Cond cond, bool P, bool U, bool W, Reg n, R
const auto address = GetAddress(ir, P, U, W, n, offset);
ir.WriteMemory16(address, ir.LeastSignificantHalf(ir.GetRegister(t)), IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
// STRH <Rt>, [<Rn>, #+/-<Rm>]{!}
@ -768,31 +768,29 @@ bool TranslatorVisitor::arm_STRH_reg(Cond cond, bool P, bool U, bool W, Reg n, R
const auto address = GetAddress(ir, P, U, W, n, offset);
ir.WriteMemory16(address, ir.LeastSignificantHalf(ir.GetRegister(t)), IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
static bool LDMHelper(TranslatorVisitor& v, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) {
static bool LDMHelper(A32::IREmitter& ir, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) {
auto address = start_address;
for (size_t i = 0; i <= 14; i++) {
if (mcl::bit::get_bit(i, list)) {
v.ir.SetRegister(static_cast<Reg>(i), v.ir.ReadMemory32(address, IR::AccType::ATOMIC));
address = v.ir.Add(address, v.ir.Imm32(4));
ir.SetRegister(static_cast<Reg>(i), ir.ReadMemory32(address, IR::AccType::ATOMIC));
address = ir.Add(address, ir.Imm32(4));
}
}
if (W && !mcl::bit::get_bit(RegNumber(n), list)) {
v.ir.SetRegister(n, writeback_address);
ir.SetRegister(n, writeback_address);
}
if (mcl::bit::get_bit<15>(list)) {
v.ir.LoadWritePC(v.ir.ReadMemory32(address, IR::AccType::ATOMIC));
if (v.options.check_halt_on_memory_access)
v.ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
else if (n == Reg::R13)
v.ir.SetTerm(IR::Term::PopRSBHint{});
ir.LoadWritePC(ir.ReadMemory32(address, IR::AccType::ATOMIC));
if (n == Reg::R13)
ir.SetTerm(IR::Term::PopRSBHint{});
else
v.ir.SetTerm(IR::Term::FastDispatchHint{});
ir.SetTerm(IR::Term::FastDispatchHint{});
return false;
}
return v.MemoryInstructionContinues();
return true;
}
// LDM <Rn>{!}, <reg_list>
@ -810,7 +808,7 @@ bool TranslatorVisitor::arm_LDM(Cond cond, bool W, Reg n, RegList list) {
const auto start_address = ir.GetRegister(n);
const auto writeback_address = ir.Add(start_address, ir.Imm32(u32(mcl::bit::count_ones(list) * 4)));
return LDMHelper(*this, W, n, list, start_address, writeback_address);
return LDMHelper(ir, W, n, list, start_address, writeback_address);
}
// LDMDA <Rn>{!}, <reg_list>
@ -828,7 +826,7 @@ bool TranslatorVisitor::arm_LDMDA(Cond cond, bool W, Reg n, RegList list) {
const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list) - 4)));
const auto writeback_address = ir.Sub(start_address, ir.Imm32(4));
return LDMHelper(*this, W, n, list, start_address, writeback_address);
return LDMHelper(ir, W, n, list, start_address, writeback_address);
}
// LDMDB <Rn>{!}, <reg_list>
@ -846,7 +844,7 @@ bool TranslatorVisitor::arm_LDMDB(Cond cond, bool W, Reg n, RegList list) {
const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list))));
const auto writeback_address = start_address;
return LDMHelper(*this, W, n, list, start_address, writeback_address);
return LDMHelper(ir, W, n, list, start_address, writeback_address);
}
// LDMIB <Rn>{!}, <reg_list>
@ -864,7 +862,7 @@ bool TranslatorVisitor::arm_LDMIB(Cond cond, bool W, Reg n, RegList list) {
const auto start_address = ir.Add(ir.GetRegister(n), ir.Imm32(4));
const auto writeback_address = ir.Add(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list))));
return LDMHelper(*this, W, n, list, start_address, writeback_address);
return LDMHelper(ir, W, n, list, start_address, writeback_address);
}
bool TranslatorVisitor::arm_LDM_usr() {
@ -875,21 +873,21 @@ bool TranslatorVisitor::arm_LDM_eret() {
return InterpretThisInstruction();
}
static bool STMHelper(TranslatorVisitor& v, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) {
static bool STMHelper(A32::IREmitter& ir, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) {
auto address = start_address;
for (size_t i = 0; i <= 14; i++) {
if (mcl::bit::get_bit(i, list)) {
v.ir.WriteMemory32(address, v.ir.GetRegister(static_cast<Reg>(i)), IR::AccType::ATOMIC);
address = v.ir.Add(address, v.ir.Imm32(4));
ir.WriteMemory32(address, ir.GetRegister(static_cast<Reg>(i)), IR::AccType::ATOMIC);
address = ir.Add(address, ir.Imm32(4));
}
}
if (W) {
v.ir.SetRegister(n, writeback_address);
ir.SetRegister(n, writeback_address);
}
if (mcl::bit::get_bit<15>(list)) {
v.ir.WriteMemory32(address, v.ir.Imm32(v.ir.PC()), IR::AccType::ATOMIC);
ir.WriteMemory32(address, ir.Imm32(ir.PC()), IR::AccType::ATOMIC);
}
return v.MemoryInstructionContinues();
return true;
}
// STM <Rn>{!}, <reg_list>
@ -904,7 +902,7 @@ bool TranslatorVisitor::arm_STM(Cond cond, bool W, Reg n, RegList list) {
const auto start_address = ir.GetRegister(n);
const auto writeback_address = ir.Add(start_address, ir.Imm32(u32(mcl::bit::count_ones(list) * 4)));
return STMHelper(*this, W, n, list, start_address, writeback_address);
return STMHelper(ir, W, n, list, start_address, writeback_address);
}
// STMDA <Rn>{!}, <reg_list>
@ -919,7 +917,7 @@ bool TranslatorVisitor::arm_STMDA(Cond cond, bool W, Reg n, RegList list) {
const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list) - 4)));
const auto writeback_address = ir.Sub(start_address, ir.Imm32(4));
return STMHelper(*this, W, n, list, start_address, writeback_address);
return STMHelper(ir, W, n, list, start_address, writeback_address);
}
// STMDB <Rn>{!}, <reg_list>
@ -934,7 +932,7 @@ bool TranslatorVisitor::arm_STMDB(Cond cond, bool W, Reg n, RegList list) {
const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list))));
const auto writeback_address = start_address;
return STMHelper(*this, W, n, list, start_address, writeback_address);
return STMHelper(ir, W, n, list, start_address, writeback_address);
}
// STMIB <Rn>{!}, <reg_list>
@ -949,7 +947,7 @@ bool TranslatorVisitor::arm_STMIB(Cond cond, bool W, Reg n, RegList list) {
const auto start_address = ir.Add(ir.GetRegister(n), ir.Imm32(4));
const auto writeback_address = ir.Add(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list))));
return STMHelper(*this, W, n, list, start_address, writeback_address);
return STMHelper(ir, W, n, list, start_address, writeback_address);
}
bool TranslatorVisitor::arm_STM_usr() {

View file

@ -29,7 +29,7 @@ bool TranslatorVisitor::arm_SWP(Cond cond, Reg n, Reg t, Reg t2) {
ir.WriteMemory32(ir.GetRegister(n), ir.GetRegister(t2), IR::AccType::SWAP);
// TODO: Alignment check
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// SWPB<c> <Rt>, <Rt2>, [<Rn>]
@ -48,7 +48,7 @@ bool TranslatorVisitor::arm_SWPB(Cond cond, Reg n, Reg t, Reg t2) {
ir.WriteMemory8(ir.GetRegister(n), ir.LeastSignificantByte(ir.GetRegister(t2)), IR::AccType::SWAP);
// TODO: Alignment check
ir.SetRegister(t, ir.ZeroExtendByteToWord(data));
return MemoryInstructionContinues();
return true;
}
// LDA<c> <Rt>, [<Rn>]
@ -63,7 +63,7 @@ bool TranslatorVisitor::arm_LDA(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ReadMemory32(address, IR::AccType::ORDERED));
return MemoryInstructionContinues();
return true;
}
// LDAB<c> <Rt>, [<Rn>]
bool TranslatorVisitor::arm_LDAB(Cond cond, Reg n, Reg t) {
@ -77,7 +77,7 @@ bool TranslatorVisitor::arm_LDAB(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ZeroExtendToWord(ir.ReadMemory8(address, IR::AccType::ORDERED)));
return MemoryInstructionContinues();
return true;
}
// LDAH<c> <Rt>, [<Rn>]
bool TranslatorVisitor::arm_LDAH(Cond cond, Reg n, Reg t) {
@ -91,7 +91,7 @@ bool TranslatorVisitor::arm_LDAH(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ZeroExtendToWord(ir.ReadMemory16(address, IR::AccType::ORDERED)));
return MemoryInstructionContinues();
return true;
}
// LDAEX<c> <Rt>, [<Rn>]
@ -106,7 +106,7 @@ bool TranslatorVisitor::arm_LDAEX(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ExclusiveReadMemory32(address, IR::AccType::ORDERED));
return MemoryInstructionContinues();
return true;
}
// LDAEXB<c> <Rt>, [<Rn>]
@ -121,7 +121,7 @@ bool TranslatorVisitor::arm_LDAEXB(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ExclusiveReadMemory8(address, IR::AccType::ORDERED)));
return MemoryInstructionContinues();
return true;
}
// LDAEXD<c> <Rt>, <Rt2>, [<Rn>]
@ -139,7 +139,7 @@ bool TranslatorVisitor::arm_LDAEXD(Cond cond, Reg n, Reg t) {
// DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR
ir.SetRegister(t, lo);
ir.SetRegister(t + 1, hi);
return MemoryInstructionContinues();
return true;
}
// LDAEXH<c> <Rt>, [<Rn>]
@ -154,7 +154,7 @@ bool TranslatorVisitor::arm_LDAEXH(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ExclusiveReadMemory16(address, IR::AccType::ORDERED)));
return MemoryInstructionContinues();
return true;
}
// STL<c> <Rt>, [<Rn>]
@ -169,7 +169,7 @@ bool TranslatorVisitor::arm_STL(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.WriteMemory32(address, ir.GetRegister(t), IR::AccType::ORDERED);
return MemoryInstructionContinues();
return true;
}
// STLB<c> <Rt>, [<Rn>]
@ -184,7 +184,7 @@ bool TranslatorVisitor::arm_STLB(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.WriteMemory8(address, ir.LeastSignificantByte(ir.GetRegister(t)), IR::AccType::ORDERED);
return MemoryInstructionContinues();
return true;
}
// STLH<c> <Rd>, <Rt>, [<Rn>]
@ -199,7 +199,7 @@ bool TranslatorVisitor::arm_STLH(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.WriteMemory16(address, ir.LeastSignificantHalf(ir.GetRegister(t)), IR::AccType::ORDERED);
return MemoryInstructionContinues();
return true;
}
// STLEXB<c> <Rd>, <Rt>, [<Rn>]
@ -220,7 +220,7 @@ bool TranslatorVisitor::arm_STLEXB(Cond cond, Reg n, Reg d, Reg t) {
const auto value = ir.LeastSignificantByte(ir.GetRegister(t));
const auto passed = ir.ExclusiveWriteMemory8(address, value, IR::AccType::ORDERED);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
// STLEXD<c> <Rd>, <Rt>, <Rt2>, [<Rn>]
bool TranslatorVisitor::arm_STLEXD(Cond cond, Reg n, Reg d, Reg t) {
@ -242,7 +242,7 @@ bool TranslatorVisitor::arm_STLEXD(Cond cond, Reg n, Reg d, Reg t) {
const auto value_hi = ir.GetRegister(t2);
const auto passed = ir.ExclusiveWriteMemory64(address, value_lo, value_hi, IR::AccType::ORDERED);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
// STLEXH<c> <Rd>, <Rt>, [<Rn>]
@ -263,7 +263,7 @@ bool TranslatorVisitor::arm_STLEXH(Cond cond, Reg n, Reg d, Reg t) {
const auto value = ir.LeastSignificantHalf(ir.GetRegister(t));
const auto passed = ir.ExclusiveWriteMemory16(address, value, IR::AccType::ORDERED);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
// STLEX<c> <Rd>, <Rt>, [<Rn>]
@ -284,7 +284,7 @@ bool TranslatorVisitor::arm_STLEX(Cond cond, Reg n, Reg d, Reg t) {
const auto value = ir.GetRegister(t);
const auto passed = ir.ExclusiveWriteMemory32(address, value, IR::AccType::ORDERED);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
// LDREX<c> <Rt>, [<Rn>]
@ -299,7 +299,7 @@ bool TranslatorVisitor::arm_LDREX(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ExclusiveReadMemory32(address, IR::AccType::ATOMIC));
return MemoryInstructionContinues();
return true;
}
// LDREXB<c> <Rt>, [<Rn>]
@ -314,7 +314,7 @@ bool TranslatorVisitor::arm_LDREXB(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ExclusiveReadMemory8(address, IR::AccType::ATOMIC)));
return MemoryInstructionContinues();
return true;
}
// LDREXD<c> <Rt>, <Rt2>, [<Rn>]
@ -332,7 +332,7 @@ bool TranslatorVisitor::arm_LDREXD(Cond cond, Reg n, Reg t) {
// DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR
ir.SetRegister(t, lo);
ir.SetRegister(t + 1, hi);
return MemoryInstructionContinues();
return true;
}
// LDREXH<c> <Rt>, [<Rn>]
@ -347,7 +347,7 @@ bool TranslatorVisitor::arm_LDREXH(Cond cond, Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ExclusiveReadMemory16(address, IR::AccType::ATOMIC)));
return MemoryInstructionContinues();
return true;
}
// STREX<c> <Rd>, <Rt>, [<Rn>]
@ -368,7 +368,7 @@ bool TranslatorVisitor::arm_STREX(Cond cond, Reg n, Reg d, Reg t) {
const auto value = ir.GetRegister(t);
const auto passed = ir.ExclusiveWriteMemory32(address, value, IR::AccType::ATOMIC);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
// STREXB<c> <Rd>, <Rt>, [<Rn>]
@ -389,7 +389,7 @@ bool TranslatorVisitor::arm_STREXB(Cond cond, Reg n, Reg d, Reg t) {
const auto value = ir.LeastSignificantByte(ir.GetRegister(t));
const auto passed = ir.ExclusiveWriteMemory8(address, value, IR::AccType::ATOMIC);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
// STREXD<c> <Rd>, <Rt>, <Rt2>, [<Rn>]
@ -412,7 +412,7 @@ bool TranslatorVisitor::arm_STREXD(Cond cond, Reg n, Reg d, Reg t) {
const auto value_hi = ir.GetRegister(t2);
const auto passed = ir.ExclusiveWriteMemory64(address, value_lo, value_hi, IR::AccType::ATOMIC);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
// STREXH<c> <Rd>, <Rt>, [<Rn>]
@ -433,7 +433,7 @@ bool TranslatorVisitor::arm_STREXH(Cond cond, Reg n, Reg d, Reg t) {
const auto value = ir.LeastSignificantHalf(ir.GetRegister(t));
const auto passed = ir.ExclusiveWriteMemory16(address, value, IR::AccType::ATOMIC);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
} // namespace Dynarmic::A32

View file

@ -449,7 +449,7 @@ bool TranslatorVisitor::thumb16_LDR_literal(Reg t, Imm<8> imm8) {
const auto data = ir.ReadMemory32(ir.Imm32(address), IR::AccType::NORMAL);
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// STR <Rt>, [<Rn>, <Rm>]
@ -459,7 +459,7 @@ bool TranslatorVisitor::thumb16_STR_reg(Reg m, Reg n, Reg t) {
const auto data = ir.GetRegister(t);
ir.WriteMemory32(address, data, IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
// STRH <Rt>, [<Rn>, <Rm>]
@ -469,7 +469,7 @@ bool TranslatorVisitor::thumb16_STRH_reg(Reg m, Reg n, Reg t) {
const auto data = ir.LeastSignificantHalf(ir.GetRegister(t));
ir.WriteMemory16(address, data, IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
// STRB <Rt>, [<Rn>, <Rm>]
@ -479,7 +479,7 @@ bool TranslatorVisitor::thumb16_STRB_reg(Reg m, Reg n, Reg t) {
const auto data = ir.LeastSignificantByte(ir.GetRegister(t));
ir.WriteMemory8(address, data, IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
// LDRSB <Rt>, [<Rn>, <Rm>]
@ -489,7 +489,7 @@ bool TranslatorVisitor::thumb16_LDRSB_reg(Reg m, Reg n, Reg t) {
const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDR <Rt>, [<Rn>, <Rm>]
@ -499,7 +499,7 @@ bool TranslatorVisitor::thumb16_LDR_reg(Reg m, Reg n, Reg t) {
const auto data = ir.ReadMemory32(address, IR::AccType::NORMAL);
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRH <Rt>, [<Rn>, <Rm>]
@ -509,7 +509,7 @@ bool TranslatorVisitor::thumb16_LDRH_reg(Reg m, Reg n, Reg t) {
const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRB <Rt>, [<Rn>, <Rm>]
@ -519,7 +519,7 @@ bool TranslatorVisitor::thumb16_LDRB_reg(Reg m, Reg n, Reg t) {
const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// LDRH <Rt>, [<Rn>, <Rm>]
@ -529,7 +529,7 @@ bool TranslatorVisitor::thumb16_LDRSH_reg(Reg m, Reg n, Reg t) {
const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// STR <Rt>, [<Rn>, #<imm>]
@ -540,7 +540,7 @@ bool TranslatorVisitor::thumb16_STR_imm_t1(Imm<5> imm5, Reg n, Reg t) {
const auto data = ir.GetRegister(t);
ir.WriteMemory32(address, data, IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
// LDR <Rt>, [<Rn>, #<imm>]
@ -551,7 +551,7 @@ bool TranslatorVisitor::thumb16_LDR_imm_t1(Imm<5> imm5, Reg n, Reg t) {
const auto data = ir.ReadMemory32(address, IR::AccType::NORMAL);
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// STRB <Rt>, [<Rn>, #<imm>]
@ -573,7 +573,7 @@ bool TranslatorVisitor::thumb16_LDRB_imm(Imm<5> imm5, Reg n, Reg t) {
const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// STRH <Rt>, [<Rn>, #<imm5>]
@ -583,7 +583,7 @@ bool TranslatorVisitor::thumb16_STRH_imm(Imm<5> imm5, Reg n, Reg t) {
const auto data = ir.LeastSignificantHalf(ir.GetRegister(t));
ir.WriteMemory16(address, data, IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
// LDRH <Rt>, [<Rn>, #<imm5>]
@ -593,7 +593,7 @@ bool TranslatorVisitor::thumb16_LDRH_imm(Imm<5> imm5, Reg n, Reg t) {
const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// STR <Rt>, [<Rn>, #<imm>]
@ -605,7 +605,7 @@ bool TranslatorVisitor::thumb16_STR_imm_t2(Reg t, Imm<8> imm8) {
const auto data = ir.GetRegister(t);
ir.WriteMemory32(address, data, IR::AccType::NORMAL);
return MemoryInstructionContinues();
return true;
}
// LDR <Rt>, [<Rn>, #<imm>]
@ -617,7 +617,7 @@ bool TranslatorVisitor::thumb16_LDR_imm_t2(Reg t, Imm<8> imm8) {
const auto data = ir.ReadMemory32(address, IR::AccType::NORMAL);
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
// ADR <Rd>, <label>
@ -775,7 +775,7 @@ bool TranslatorVisitor::thumb16_PUSH(bool M, RegList reg_list) {
ir.SetRegister(Reg::SP, final_address);
// TODO(optimization): Possible location for an RSB push.
return MemoryInstructionContinues();
return true;
}
// POP <reg_list>
@ -804,15 +804,11 @@ bool TranslatorVisitor::thumb16_POP(bool P, RegList reg_list) {
ir.LoadWritePC(data);
address = ir.Add(address, ir.Imm32(4));
ir.SetRegister(Reg::SP, address);
if (options.check_halt_on_memory_access) {
ir.SetTerm(IR::Term::CheckHalt{IR::Term::PopRSBHint{}});
} else {
ir.SetTerm(IR::Term::PopRSBHint{});
}
ir.SetTerm(IR::Term::PopRSBHint{});
return false;
} else {
ir.SetRegister(Reg::SP, address);
return MemoryInstructionContinues();
return true;
}
}
@ -891,7 +887,7 @@ bool TranslatorVisitor::thumb16_STMIA(Reg n, RegList reg_list) {
}
ir.SetRegister(n, address);
return MemoryInstructionContinues();
return true;
}
// LDM <Rn>!, <reg_list>
@ -914,7 +910,7 @@ bool TranslatorVisitor::thumb16_LDMIA(Reg n, RegList reg_list) {
if (write_back) {
ir.SetRegister(n, address);
}
return MemoryInstructionContinues();
return true;
}
// CB{N}Z <Rn>, <label>

View file

@ -34,7 +34,7 @@ static bool LoadByteLiteral(TranslatorVisitor& v, bool U, Reg t, Imm<12> imm12,
const auto data = (v.ir.*ext_fn)(v.ir.ReadMemory8(v.ir.Imm32(address), IR::AccType::NORMAL));
v.ir.SetRegister(t, data);
return v.MemoryInstructionContinues();
return true;
}
static bool LoadByteRegister(TranslatorVisitor& v, Reg n, Reg t, Imm<2> imm2, Reg m, ExtensionFunction ext_fn) {
@ -49,7 +49,7 @@ static bool LoadByteRegister(TranslatorVisitor& v, Reg n, Reg t, Imm<2> imm2, Re
const auto data = (v.ir.*ext_fn)(v.ir.ReadMemory8(address, IR::AccType::NORMAL));
v.ir.SetRegister(t, data);
return v.MemoryInstructionContinues();
return true;
}
static bool LoadByteImmediate(TranslatorVisitor& v, Reg n, Reg t, bool P, bool U, bool W, Imm<12> imm12, ExtensionFunction ext_fn) {
@ -64,7 +64,7 @@ static bool LoadByteImmediate(TranslatorVisitor& v, Reg n, Reg t, bool P, bool U
if (W) {
v.ir.SetRegister(n, offset_address);
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_PLD_lit(bool /*U*/, Imm<12> /*imm12*/) {

View file

@ -16,7 +16,7 @@ static bool LoadHalfLiteral(TranslatorVisitor& v, bool U, Reg t, Imm<12> imm12,
const auto data = (v.ir.*ext_fn)(v.ir.ReadMemory16(v.ir.Imm32(address), IR::AccType::NORMAL));
v.ir.SetRegister(t, data);
return v.MemoryInstructionContinues();
return true;
}
static bool LoadHalfRegister(TranslatorVisitor& v, Reg n, Reg t, Imm<2> imm2, Reg m, ExtensionFunction ext_fn) {
@ -31,7 +31,7 @@ static bool LoadHalfRegister(TranslatorVisitor& v, Reg n, Reg t, Imm<2> imm2, Re
const IR::U32 data = (v.ir.*ext_fn)(v.ir.ReadMemory16(address, IR::AccType::NORMAL));
v.ir.SetRegister(t, data);
return v.MemoryInstructionContinues();
return true;
}
static bool LoadHalfImmediate(TranslatorVisitor& v, Reg n, Reg t, bool P, bool U, bool W, Imm<12> imm12, ExtensionFunction ext_fn) {
@ -48,7 +48,7 @@ static bool LoadHalfImmediate(TranslatorVisitor& v, Reg n, Reg t, bool P, bool U
}
v.ir.SetRegister(t, data);
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_LDRH_lit(bool U, Reg t, Imm<12> imm12) {

View file

@ -36,11 +36,7 @@ static bool TableBranch(TranslatorVisitor& v, Reg n, Reg m, bool half) {
v.ir.UpdateUpperLocationDescriptor();
v.ir.BranchWritePC(branch_value);
if (v.options.check_halt_on_memory_access) {
v.ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
} else {
v.ir.SetTerm(IR::Term::FastDispatchHint{});
}
v.ir.SetTerm(IR::Term::FastDispatchHint{});
return false;
}
@ -72,7 +68,7 @@ static bool LoadDualImmediate(TranslatorVisitor& v, bool P, bool U, bool W, Reg
if (W) {
v.ir.SetRegister(n, offset_address);
}
return v.MemoryInstructionContinues();
return true;
}
static bool LoadDualLiteral(TranslatorVisitor& v, bool U, bool W, Reg t, Reg t2, Imm<8> imm8) {
@ -98,7 +94,7 @@ static bool LoadDualLiteral(TranslatorVisitor& v, bool U, bool W, Reg t, Reg t2,
v.ir.SetRegister(t2, v.ir.MostSignificantWord(data).result);
}
return v.MemoryInstructionContinues();
return true;
}
static bool StoreDual(TranslatorVisitor& v, bool P, bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8) {
@ -127,7 +123,7 @@ static bool StoreDual(TranslatorVisitor& v, bool P, bool U, bool W, Reg n, Reg t
if (W) {
v.ir.SetRegister(n, offset_address);
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_LDA(Reg n, Reg t) {
@ -173,7 +169,7 @@ bool TranslatorVisitor::thumb32_LDREX(Reg n, Reg t, Imm<8> imm8) {
const auto value = ir.ExclusiveReadMemory32(address, IR::AccType::ATOMIC);
ir.SetRegister(t, value);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_LDREXB(Reg n, Reg t) {
@ -185,7 +181,7 @@ bool TranslatorVisitor::thumb32_LDREXB(Reg n, Reg t) {
const auto value = ir.ZeroExtendToWord(ir.ExclusiveReadMemory8(address, IR::AccType::ATOMIC));
ir.SetRegister(t, value);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_LDREXD(Reg n, Reg t, Reg t2) {
@ -199,7 +195,7 @@ bool TranslatorVisitor::thumb32_LDREXD(Reg n, Reg t, Reg t2) {
// DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR
ir.SetRegister(t, lo);
ir.SetRegister(t2, hi);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_LDREXH(Reg n, Reg t) {
@ -211,7 +207,7 @@ bool TranslatorVisitor::thumb32_LDREXH(Reg n, Reg t) {
const auto value = ir.ZeroExtendToWord(ir.ExclusiveReadMemory16(address, IR::AccType::ATOMIC));
ir.SetRegister(t, value);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_STL(Reg n, Reg t) {
@ -221,7 +217,7 @@ bool TranslatorVisitor::thumb32_STL(Reg n, Reg t) {
const auto address = ir.GetRegister(n);
ir.WriteMemory32(address, ir.GetRegister(t), IR::AccType::ORDERED);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_STREX(Reg n, Reg t, Reg d, Imm<8> imm8) {
@ -236,7 +232,7 @@ bool TranslatorVisitor::thumb32_STREX(Reg n, Reg t, Reg d, Imm<8> imm8) {
const auto value = ir.GetRegister(t);
const auto passed = ir.ExclusiveWriteMemory32(address, value, IR::AccType::ATOMIC);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_STREXB(Reg n, Reg t, Reg d) {
@ -251,7 +247,7 @@ bool TranslatorVisitor::thumb32_STREXB(Reg n, Reg t, Reg d) {
const auto value = ir.LeastSignificantByte(ir.GetRegister(t));
const auto passed = ir.ExclusiveWriteMemory8(address, value, IR::AccType::ATOMIC);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_STREXD(Reg n, Reg t, Reg t2, Reg d) {
@ -267,7 +263,7 @@ bool TranslatorVisitor::thumb32_STREXD(Reg n, Reg t, Reg t2, Reg d) {
const auto value_hi = ir.GetRegister(t2);
const auto passed = ir.ExclusiveWriteMemory64(address, value_lo, value_hi, IR::AccType::ATOMIC);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_STREXH(Reg n, Reg t, Reg d) {
@ -282,7 +278,7 @@ bool TranslatorVisitor::thumb32_STREXH(Reg n, Reg t, Reg d) {
const auto value = ir.LeastSignificantHalf(ir.GetRegister(t));
const auto passed = ir.ExclusiveWriteMemory16(address, value, IR::AccType::ATOMIC);
ir.SetRegister(d, passed);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_TBB(Reg n, Reg m) {

View file

@ -12,44 +12,42 @@ static bool ITBlockCheck(const A32::IREmitter& ir) {
return ir.current_location.IT().IsInITBlock() && !ir.current_location.IT().IsLastInITBlock();
}
static bool LDMHelper(TranslatorVisitor& v, bool W, Reg n, u32 list, const IR::U32& start_address, const IR::U32& writeback_address) {
static bool LDMHelper(A32::IREmitter& ir, bool W, Reg n, u32 list, const IR::U32& start_address, const IR::U32& writeback_address) {
auto address = start_address;
for (size_t i = 0; i <= 14; i++) {
if (mcl::bit::get_bit(i, list)) {
v.ir.SetRegister(static_cast<Reg>(i), v.ir.ReadMemory32(address, IR::AccType::ATOMIC));
address = v.ir.Add(address, v.ir.Imm32(4));
ir.SetRegister(static_cast<Reg>(i), ir.ReadMemory32(address, IR::AccType::ATOMIC));
address = ir.Add(address, ir.Imm32(4));
}
}
if (W && !mcl::bit::get_bit(RegNumber(n), list)) {
v.ir.SetRegister(n, writeback_address);
ir.SetRegister(n, writeback_address);
}
if (mcl::bit::get_bit<15>(list)) {
v.ir.UpdateUpperLocationDescriptor();
v.ir.LoadWritePC(v.ir.ReadMemory32(address, IR::AccType::ATOMIC));
if (v.options.check_halt_on_memory_access) {
v.ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
} else if (n == Reg::R13) {
v.ir.SetTerm(IR::Term::PopRSBHint{});
ir.UpdateUpperLocationDescriptor();
ir.LoadWritePC(ir.ReadMemory32(address, IR::AccType::ATOMIC));
if (n == Reg::R13) {
ir.SetTerm(IR::Term::PopRSBHint{});
} else {
v.ir.SetTerm(IR::Term::FastDispatchHint{});
ir.SetTerm(IR::Term::FastDispatchHint{});
}
return false;
}
return v.MemoryInstructionContinues();
return true;
}
static bool STMHelper(TranslatorVisitor& v, bool W, Reg n, u32 list, const IR::U32& start_address, const IR::U32& writeback_address) {
static bool STMHelper(A32::IREmitter& ir, bool W, Reg n, u32 list, const IR::U32& start_address, const IR::U32& writeback_address) {
auto address = start_address;
for (size_t i = 0; i <= 14; i++) {
if (mcl::bit::get_bit(i, list)) {
v.ir.WriteMemory32(address, v.ir.GetRegister(static_cast<Reg>(i)), IR::AccType::ATOMIC);
address = v.ir.Add(address, v.ir.Imm32(4));
ir.WriteMemory32(address, ir.GetRegister(static_cast<Reg>(i)), IR::AccType::ATOMIC);
address = ir.Add(address, ir.Imm32(4));
}
}
if (W) {
v.ir.SetRegister(n, writeback_address);
ir.SetRegister(n, writeback_address);
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_LDMDB(bool W, Reg n, Imm<16> reg_list) {
@ -74,7 +72,7 @@ bool TranslatorVisitor::thumb32_LDMDB(bool W, Reg n, Imm<16> reg_list) {
// Start address is the same as the writeback address.
const IR::U32 start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(4 * num_regs));
return LDMHelper(*this, W, n, regs_imm, start_address, start_address);
return LDMHelper(ir, W, n, regs_imm, start_address, start_address);
}
bool TranslatorVisitor::thumb32_LDMIA(bool W, Reg n, Imm<16> reg_list) {
@ -99,7 +97,7 @@ bool TranslatorVisitor::thumb32_LDMIA(bool W, Reg n, Imm<16> reg_list) {
const auto start_address = ir.GetRegister(n);
const auto writeback_address = ir.Add(start_address, ir.Imm32(num_regs * 4));
return LDMHelper(*this, W, n, regs_imm, start_address, writeback_address);
return LDMHelper(ir, W, n, regs_imm, start_address, writeback_address);
}
bool TranslatorVisitor::thumb32_POP(Imm<16> reg_list) {
@ -126,7 +124,7 @@ bool TranslatorVisitor::thumb32_STMIA(bool W, Reg n, Imm<15> reg_list) {
const auto start_address = ir.GetRegister(n);
const auto writeback_address = ir.Add(start_address, ir.Imm32(num_regs * 4));
return STMHelper(*this, W, n, regs_imm, start_address, writeback_address);
return STMHelper(ir, W, n, regs_imm, start_address, writeback_address);
}
bool TranslatorVisitor::thumb32_STMDB(bool W, Reg n, Imm<15> reg_list) {
@ -145,7 +143,7 @@ bool TranslatorVisitor::thumb32_STMDB(bool W, Reg n, Imm<15> reg_list) {
// Start address is the same as the writeback address.
const IR::U32 start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(4 * num_regs));
return STMHelper(*this, W, n, regs_imm, start_address, start_address);
return STMHelper(ir, W, n, regs_imm, start_address, start_address);
}
} // namespace Dynarmic::A32

View file

@ -23,16 +23,12 @@ bool TranslatorVisitor::thumb32_LDR_lit(bool U, Reg t, Imm<12> imm12) {
if (t == Reg::PC) {
ir.UpdateUpperLocationDescriptor();
ir.LoadWritePC(data);
if (options.check_halt_on_memory_access) {
ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
} else {
ir.SetTerm(IR::Term::FastDispatchHint{});
}
ir.SetTerm(IR::Term::FastDispatchHint{});
return false;
}
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_LDR_imm8(Reg n, Reg t, bool P, bool U, bool W, Imm<8> imm8) {
@ -62,9 +58,7 @@ bool TranslatorVisitor::thumb32_LDR_imm8(Reg n, Reg t, bool P, bool U, bool W, I
ir.UpdateUpperLocationDescriptor();
ir.LoadWritePC(data);
if (options.check_halt_on_memory_access) {
ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
} else if (!P && W && n == Reg::R13) {
if (!P && W && n == Reg::R13) {
ir.SetTerm(IR::Term::PopRSBHint{});
} else {
ir.SetTerm(IR::Term::FastDispatchHint{});
@ -74,7 +68,7 @@ bool TranslatorVisitor::thumb32_LDR_imm8(Reg n, Reg t, bool P, bool U, bool W, I
}
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_LDR_imm12(Reg n, Reg t, Imm<12> imm12) {
@ -90,16 +84,12 @@ bool TranslatorVisitor::thumb32_LDR_imm12(Reg n, Reg t, Imm<12> imm12) {
if (t == Reg::PC) {
ir.UpdateUpperLocationDescriptor();
ir.LoadWritePC(data);
if (options.check_halt_on_memory_access) {
ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
} else {
ir.SetTerm(IR::Term::FastDispatchHint{});
}
ir.SetTerm(IR::Term::FastDispatchHint{});
return false;
}
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_LDR_reg(Reg n, Reg t, Imm<2> imm2, Reg m) {
@ -119,16 +109,12 @@ bool TranslatorVisitor::thumb32_LDR_reg(Reg n, Reg t, Imm<2> imm2, Reg m) {
if (t == Reg::PC) {
ir.UpdateUpperLocationDescriptor();
ir.LoadWritePC(data);
if (options.check_halt_on_memory_access) {
ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
} else {
ir.SetTerm(IR::Term::FastDispatchHint{});
}
ir.SetTerm(IR::Term::FastDispatchHint{});
return false;
}
ir.SetRegister(t, data);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::thumb32_LDRT(Reg n, Reg t, Imm<8> imm8) {

View file

@ -1201,7 +1201,7 @@ bool TranslatorVisitor::vfp_VPOP(Cond cond, bool D, size_t Vd, bool sz, Imm<8> i
}
}
return MemoryInstructionContinues();
return true;
}
// VPUSH.{F32,F64} <list>
@ -1242,7 +1242,7 @@ bool TranslatorVisitor::vfp_VPUSH(Cond cond, bool D, size_t Vd, bool sz, Imm<8>
}
}
return MemoryInstructionContinues();
return true;
}
// VLDR<c> <Dd>, [<Rn>{, #+/-<imm>}]
@ -1268,7 +1268,7 @@ bool TranslatorVisitor::vfp_VLDR(Cond cond, bool U, bool D, Reg n, size_t Vd, bo
ir.SetExtendedRegister(d, ir.ReadMemory32(address, IR::AccType::ATOMIC));
}
return MemoryInstructionContinues();
return true;
}
// VSTR<c> <Dd>, [<Rn>{, #+/-<imm>}]
@ -1295,7 +1295,7 @@ bool TranslatorVisitor::vfp_VSTR(Cond cond, bool U, bool D, Reg n, size_t Vd, bo
ir.WriteMemory32(address, ir.GetExtendedRegister(d), IR::AccType::ATOMIC);
}
return MemoryInstructionContinues();
return true;
}
// VSTM{mode}<c> <Rn>{!}, <list of double registers>
@ -1347,7 +1347,7 @@ bool TranslatorVisitor::vfp_VSTM_a1(Cond cond, bool p, bool u, bool D, bool w, R
address = ir.Add(address, ir.Imm32(4));
}
return MemoryInstructionContinues();
return true;
}
// VSTM{mode}<c> <Rn>{!}, <list of single registers>
@ -1390,7 +1390,7 @@ bool TranslatorVisitor::vfp_VSTM_a2(Cond cond, bool p, bool u, bool D, bool w, R
address = ir.Add(address, ir.Imm32(4));
}
return MemoryInstructionContinues();
return true;
}
// VLDM{mode}<c> <Rn>{!}, <list of double registers>
@ -1440,7 +1440,7 @@ bool TranslatorVisitor::vfp_VLDM_a1(Cond cond, bool p, bool u, bool D, bool w, R
ir.SetExtendedRegister(d + i, ir.Pack2x32To1x64(word1, word2));
}
return MemoryInstructionContinues();
return true;
}
// VLDM{mode}<c> <Rn>{!}, <list of single registers>
@ -1483,7 +1483,7 @@ bool TranslatorVisitor::vfp_VLDM_a2(Cond cond, bool p, bool u, bool D, bool w, R
ir.SetExtendedRegister(d + i, word);
}
return MemoryInstructionContinues();
return true;
}
} // namespace Dynarmic::A32

View file

@ -107,83 +107,83 @@ void IREmitter::ClearExclusive() {
}
IR::U8 IREmitter::ReadMemory8(const IR::U64& vaddr, IR::AccType acc_type) {
return Inst<IR::U8>(Opcode::A64ReadMemory8, vaddr, IR::Value{acc_type});
return Inst<IR::U8>(Opcode::A64ReadMemory8, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
IR::U16 IREmitter::ReadMemory16(const IR::U64& vaddr, IR::AccType acc_type) {
return Inst<IR::U16>(Opcode::A64ReadMemory16, vaddr, IR::Value{acc_type});
return Inst<IR::U16>(Opcode::A64ReadMemory16, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
IR::U32 IREmitter::ReadMemory32(const IR::U64& vaddr, IR::AccType acc_type) {
return Inst<IR::U32>(Opcode::A64ReadMemory32, vaddr, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A64ReadMemory32, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
IR::U64 IREmitter::ReadMemory64(const IR::U64& vaddr, IR::AccType acc_type) {
return Inst<IR::U64>(Opcode::A64ReadMemory64, vaddr, IR::Value{acc_type});
return Inst<IR::U64>(Opcode::A64ReadMemory64, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
IR::U128 IREmitter::ReadMemory128(const IR::U64& vaddr, IR::AccType acc_type) {
return Inst<IR::U128>(Opcode::A64ReadMemory128, vaddr, IR::Value{acc_type});
return Inst<IR::U128>(Opcode::A64ReadMemory128, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
IR::U8 IREmitter::ExclusiveReadMemory8(const IR::U64& vaddr, IR::AccType acc_type) {
return Inst<IR::U8>(Opcode::A64ExclusiveReadMemory8, vaddr, IR::Value{acc_type});
return Inst<IR::U8>(Opcode::A64ExclusiveReadMemory8, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
IR::U16 IREmitter::ExclusiveReadMemory16(const IR::U64& vaddr, IR::AccType acc_type) {
return Inst<IR::U16>(Opcode::A64ExclusiveReadMemory16, vaddr, IR::Value{acc_type});
return Inst<IR::U16>(Opcode::A64ExclusiveReadMemory16, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
IR::U32 IREmitter::ExclusiveReadMemory32(const IR::U64& vaddr, IR::AccType acc_type) {
return Inst<IR::U32>(Opcode::A64ExclusiveReadMemory32, vaddr, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A64ExclusiveReadMemory32, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
IR::U64 IREmitter::ExclusiveReadMemory64(const IR::U64& vaddr, IR::AccType acc_type) {
return Inst<IR::U64>(Opcode::A64ExclusiveReadMemory64, vaddr, IR::Value{acc_type});
return Inst<IR::U64>(Opcode::A64ExclusiveReadMemory64, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
IR::U128 IREmitter::ExclusiveReadMemory128(const IR::U64& vaddr, IR::AccType acc_type) {
return Inst<IR::U128>(Opcode::A64ExclusiveReadMemory128, vaddr, IR::Value{acc_type});
return Inst<IR::U128>(Opcode::A64ExclusiveReadMemory128, ImmCurrentLocationDescriptor(), vaddr, IR::Value{acc_type});
}
void IREmitter::WriteMemory8(const IR::U64& vaddr, const IR::U8& value, IR::AccType acc_type) {
Inst(Opcode::A64WriteMemory8, vaddr, value, IR::Value{acc_type});
Inst(Opcode::A64WriteMemory8, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
void IREmitter::WriteMemory16(const IR::U64& vaddr, const IR::U16& value, IR::AccType acc_type) {
Inst(Opcode::A64WriteMemory16, vaddr, value, IR::Value{acc_type});
Inst(Opcode::A64WriteMemory16, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
void IREmitter::WriteMemory32(const IR::U64& vaddr, const IR::U32& value, IR::AccType acc_type) {
Inst(Opcode::A64WriteMemory32, vaddr, value, IR::Value{acc_type});
Inst(Opcode::A64WriteMemory32, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
void IREmitter::WriteMemory64(const IR::U64& vaddr, const IR::U64& value, IR::AccType acc_type) {
Inst(Opcode::A64WriteMemory64, vaddr, value, IR::Value{acc_type});
Inst(Opcode::A64WriteMemory64, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
void IREmitter::WriteMemory128(const IR::U64& vaddr, const IR::U128& value, IR::AccType acc_type) {
Inst(Opcode::A64WriteMemory128, vaddr, value, IR::Value{acc_type});
Inst(Opcode::A64WriteMemory128, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
IR::U32 IREmitter::ExclusiveWriteMemory8(const IR::U64& vaddr, const IR::U8& value, IR::AccType acc_type) {
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory8, vaddr, value, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory8, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
IR::U32 IREmitter::ExclusiveWriteMemory16(const IR::U64& vaddr, const IR::U16& value, IR::AccType acc_type) {
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory16, vaddr, value, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory16, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
IR::U32 IREmitter::ExclusiveWriteMemory32(const IR::U64& vaddr, const IR::U32& value, IR::AccType acc_type) {
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory32, vaddr, value, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory32, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
IR::U32 IREmitter::ExclusiveWriteMemory64(const IR::U64& vaddr, const IR::U64& value, IR::AccType acc_type) {
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory64, vaddr, value, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory64, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
IR::U32 IREmitter::ExclusiveWriteMemory128(const IR::U64& vaddr, const IR::U128& value, IR::AccType acc_type) {
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory128, vaddr, value, IR::Value{acc_type});
return Inst<IR::U32>(Opcode::A64ExclusiveWriteMemory128, ImmCurrentLocationDescriptor(), vaddr, value, IR::Value{acc_type});
}
IR::U32 IREmitter::GetW(Reg reg) {
@ -262,4 +262,8 @@ void IREmitter::SetPC(const IR::U64& value) {
Inst(Opcode::A64SetPC, value);
}
IR::U64 IREmitter::ImmCurrentLocationDescriptor() {
return Imm64(IR::LocationDescriptor{*current_location}.Value());
}
} // namespace Dynarmic::A64

View file

@ -95,6 +95,9 @@ public:
void SetFPCR(const IR::U32& value);
void SetFPSR(const IR::U32& value);
void SetPC(const IR::U64& value);
private:
IR::U64 ImmCurrentLocationDescriptor();
};
} // namespace Dynarmic::A64

View file

@ -35,12 +35,6 @@ struct TranslationOptions {
/// If this is false, we treat the instruction as a NOP.
/// If this is true, we emit an ExceptionRaised instruction.
bool hook_hint_instructions = true;
/// This changes what IR we emit when we translate a memory instruction.
/// If this is false, memory accesses are not considered terminal.
/// If this is true, memory access are considered terminal. This allows
/// accurately emulating protection fault handlers.
bool check_halt_on_memory_access = false;
};
/**

View file

@ -41,15 +41,6 @@ bool TranslatorVisitor::RaiseException(Exception exception) {
return false;
}
bool TranslatorVisitor::MemoryInstructionContinues() {
if (options.check_halt_on_memory_access) {
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->AdvancePC(4)});
return false;
}
return true;
}
std::optional<TranslatorVisitor::BitMasks> TranslatorVisitor::DecodeBitMasks(bool immN, Imm<6> imms, Imm<6> immr, bool immediate) {
const int len = mcl::bit::highest_set_bit((immN ? 1 << 6 : 0) | (imms.ZeroExtend() ^ 0b111111));
if (len < 1) {

View file

@ -30,7 +30,6 @@ struct TranslatorVisitor final {
bool ReservedValue();
bool UnallocatedEncoding();
bool RaiseException(Exception exception);
bool MemoryInstructionContinues();
struct BitMasks {
u64 wmask, tmask;

View file

@ -72,7 +72,7 @@ static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& v, bool pair, s
UNREACHABLE();
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::STXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) {
@ -175,7 +175,7 @@ static bool OrderedSharedDecodeAndOperation(TranslatorVisitor& v, size_t size, b
UNREACHABLE();
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::STLLR(Imm<2> sz, Reg Rn, Reg Rt) {

View file

@ -15,7 +15,7 @@ bool TranslatorVisitor::LDR_lit_gen(bool opc_0, Imm<19> imm19, Reg Rt) {
const auto data = Mem(ir.Imm64(address), size, IR::AccType::NORMAL);
X(8 * size, Rt, data);
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::LDR_lit_fpsimd(Imm<2> opc, Imm<19> imm19, Vec Vt) {
@ -33,7 +33,7 @@ bool TranslatorVisitor::LDR_lit_fpsimd(Imm<2> opc, Imm<19> imm19, Vec Vt) {
} else {
V(128, Vt, ir.ZeroExtendToQuad(data));
}
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::LDRSW_lit(Imm<19> imm19, Reg Rt) {
@ -42,7 +42,7 @@ bool TranslatorVisitor::LDRSW_lit(Imm<19> imm19, Reg Rt) {
const auto data = Mem(ir.Imm64(address), 4, IR::AccType::NORMAL);
X(64, Rt, ir.SignExtendWordToLong(data));
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::PRFM_lit(Imm<19> /*imm19*/, Imm<5> /*prfop*/) {

View file

@ -104,7 +104,7 @@ static bool SharedDecodeAndOperation(TranslatorVisitor& v, bool wback, IR::MemOp
}
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::STx_mult_1(bool Q, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt) {

View file

@ -72,7 +72,7 @@ static bool LoadStoreRegisterImmediate(TranslatorVisitor& v, bool wback, bool po
}
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::STRx_LDRx_imm_1(Imm<2> size, Imm<2> opc, Imm<9> imm9, bool not_postindex, Reg Rn, Reg Rt) {
@ -165,7 +165,7 @@ static bool LoadStoreSIMD(TranslatorVisitor& v, bool wback, bool postindex, size
}
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::STR_imm_fpsimd_1(Imm<2> size, Imm<1> opc_1, Imm<9> imm9, bool not_postindex, Reg Rn, Vec Vt) {

View file

@ -78,7 +78,7 @@ bool TranslatorVisitor::STP_LDP_gen(Imm<2> opc, bool not_postindex, bool wback,
}
}
return MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::STP_LDP_fpsimd(Imm<2> opc, bool not_postindex, bool wback, Imm<1> L, Imm<7> imm7, Vec Vt2, Reg Rn, Vec Vt) {
@ -148,7 +148,7 @@ bool TranslatorVisitor::STP_LDP_fpsimd(Imm<2> opc, bool not_postindex, bool wbac
}
}
return MemoryInstructionContinues();
return true;
}
} // namespace Dynarmic::A64

View file

@ -70,7 +70,7 @@ static bool RegSharedDecodeAndOperation(TranslatorVisitor& v, size_t scale, u8 s
UNREACHABLE();
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::STRx_reg(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> option, bool S, Reg Rn, Reg Rt) {
@ -128,7 +128,7 @@ static bool VecSharedDecodeAndOperation(TranslatorVisitor& v, size_t scale, u8 s
UNREACHABLE();
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::STR_reg_fpsimd(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> option, bool S, Reg Rn, Vec Vt) {

View file

@ -22,7 +22,7 @@ static bool StoreRegister(TranslatorVisitor& v, const size_t datasize, const Imm
const IR::UAny data = v.X(datasize, Rt);
v.Mem(address, datasize / 8, acctype, data);
return v.MemoryInstructionContinues();
return true;
}
static bool LoadRegister(TranslatorVisitor& v, const size_t datasize, const Imm<9> imm9, const Reg Rn, const Reg Rt) {
@ -42,7 +42,7 @@ static bool LoadRegister(TranslatorVisitor& v, const size_t datasize, const Imm<
// max is used to zeroextend < 32 to 32, and > 32 to 64
const size_t extended_size = std::max<size_t>(32, datasize);
v.X(extended_size, Rt, v.ZeroExtend(data, extended_size));
return v.MemoryInstructionContinues();
return true;
}
static bool LoadRegisterSigned(TranslatorVisitor& v, const size_t datasize, const Imm<2> opc, const Imm<9> imm9, const Reg Rn, const Reg Rt) {
@ -90,7 +90,7 @@ static bool LoadRegisterSigned(TranslatorVisitor& v, const size_t datasize, cons
// Prefetch(address, Rt);
break;
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::STTRB(Imm<9> imm9, Reg Rn, Reg Rt) {
@ -144,6 +144,6 @@ bool TranslatorVisitor::LDTRSW(Imm<9> imm9, Reg Rn, Reg Rt) {
const IR::UAny data = Mem(address, 4, acctype);
X(64, Rt, SignExtend(data, 64));
return MemoryInstructionContinues();
return true;
}
} // namespace Dynarmic::A64

View file

@ -98,7 +98,7 @@ static bool SharedDecodeAndOperation(TranslatorVisitor& v, bool wback, IR::MemOp
}
}
return v.MemoryInstructionContinues();
return true;
}
bool TranslatorVisitor::LD1_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {

View file

@ -229,9 +229,6 @@ struct UserConfig {
// Minimum size is about 8MiB. Maximum size is about 2GiB. Maximum size is limited by
// the maximum length of a x64 jump.
size_t code_cache_size = 256 * 1024 * 1024; // bytes
// Determines the relative size of the near and far code caches. Must be smaller than
// code_cache_size.
size_t far_code_offset = 200 * 1024 * 1024; // bytes
};
} // namespace A32

View file

@ -288,9 +288,6 @@ struct UserConfig {
// Minimum size is about 8MiB. Maximum size is about 2GiB. Maximum size is limited by
// the maximum length of a x64 jump.
size_t code_cache_size = 256 * 1024 * 1024; // bytes
// Determines the relative size of the near and far code caches. Must be smaller than
// code_cache_size.
size_t far_code_offset = 200 * 1024 * 1024; // bytes
};
} // namespace A64

View file

@ -12,6 +12,7 @@ namespace Dynarmic {
enum class HaltReason : std::uint32_t {
Step = 0x00000001,
CacheInvalidation = 0x00000002,
MemoryAbort = 0x00000004,
UserDefined1 = 0x01000000,
UserDefined2 = 0x02000000,
UserDefined3 = 0x04000000,

View file

@ -688,45 +688,45 @@ OPCODE(FPVectorToUnsignedFixed64, U128, U128
// A32 Memory access
A32OPC(ClearExclusive, Void, )
A32OPC(ReadMemory8, U8, U32, AccType )
A32OPC(ReadMemory16, U16, U32, AccType )
A32OPC(ReadMemory32, U32, U32, AccType )
A32OPC(ReadMemory64, U64, U32, AccType )
A32OPC(ExclusiveReadMemory8, U8, U32, AccType )
A32OPC(ExclusiveReadMemory16, U16, U32, AccType )
A32OPC(ExclusiveReadMemory32, U32, U32, AccType )
A32OPC(ExclusiveReadMemory64, U64, U32, AccType )
A32OPC(WriteMemory8, Void, U32, U8, AccType )
A32OPC(WriteMemory16, Void, U32, U16, AccType )
A32OPC(WriteMemory32, Void, U32, U32, AccType )
A32OPC(WriteMemory64, Void, U32, U64, AccType )
A32OPC(ExclusiveWriteMemory8, U32, U32, U8, AccType )
A32OPC(ExclusiveWriteMemory16, U32, U32, U16, AccType )
A32OPC(ExclusiveWriteMemory32, U32, U32, U32, AccType )
A32OPC(ExclusiveWriteMemory64, U32, U32, U64, AccType )
A32OPC(ReadMemory8, U8, U64, U32, AccType )
A32OPC(ReadMemory16, U16, U64, U32, AccType )
A32OPC(ReadMemory32, U32, U64, U32, AccType )
A32OPC(ReadMemory64, U64, U64, U32, AccType )
A32OPC(ExclusiveReadMemory8, U8, U64, U32, AccType )
A32OPC(ExclusiveReadMemory16, U16, U64, U32, AccType )
A32OPC(ExclusiveReadMemory32, U32, U64, U32, AccType )
A32OPC(ExclusiveReadMemory64, U64, U64, U32, AccType )
A32OPC(WriteMemory8, Void, U64, U32, U8, AccType )
A32OPC(WriteMemory16, Void, U64, U32, U16, AccType )
A32OPC(WriteMemory32, Void, U64, U32, U32, AccType )
A32OPC(WriteMemory64, Void, U64, U32, U64, AccType )
A32OPC(ExclusiveWriteMemory8, U32, U64, U32, U8, AccType )
A32OPC(ExclusiveWriteMemory16, U32, U64, U32, U16, AccType )
A32OPC(ExclusiveWriteMemory32, U32, U64, U32, U32, AccType )
A32OPC(ExclusiveWriteMemory64, U32, U64, U32, U64, AccType )
// A64 Memory access
A64OPC(ClearExclusive, Void, )
A64OPC(ReadMemory8, U8, U64, AccType )
A64OPC(ReadMemory16, U16, U64, AccType )
A64OPC(ReadMemory32, U32, U64, AccType )
A64OPC(ReadMemory64, U64, U64, AccType )
A64OPC(ReadMemory128, U128, U64, AccType )
A64OPC(ExclusiveReadMemory8, U8, U64, AccType )
A64OPC(ExclusiveReadMemory16, U16, U64, AccType )
A64OPC(ExclusiveReadMemory32, U32, U64, AccType )
A64OPC(ExclusiveReadMemory64, U64, U64, AccType )
A64OPC(ExclusiveReadMemory128, U128, U64, AccType )
A64OPC(WriteMemory8, Void, U64, U8, AccType )
A64OPC(WriteMemory16, Void, U64, U16, AccType )
A64OPC(WriteMemory32, Void, U64, U32, AccType )
A64OPC(WriteMemory64, Void, U64, U64, AccType )
A64OPC(WriteMemory128, Void, U64, U128, AccType )
A64OPC(ExclusiveWriteMemory8, U32, U64, U8, AccType )
A64OPC(ExclusiveWriteMemory16, U32, U64, U16, AccType )
A64OPC(ExclusiveWriteMemory32, U32, U64, U32, AccType )
A64OPC(ExclusiveWriteMemory64, U32, U64, U64, AccType )
A64OPC(ExclusiveWriteMemory128, U32, U64, U128, AccType )
A64OPC(ReadMemory8, U8, U64, U64, AccType )
A64OPC(ReadMemory16, U16, U64, U64, AccType )
A64OPC(ReadMemory32, U32, U64, U64, AccType )
A64OPC(ReadMemory64, U64, U64, U64, AccType )
A64OPC(ReadMemory128, U128, U64, U64, AccType )
A64OPC(ExclusiveReadMemory8, U8, U64, U64, AccType )
A64OPC(ExclusiveReadMemory16, U16, U64, U64, AccType )
A64OPC(ExclusiveReadMemory32, U32, U64, U64, AccType )
A64OPC(ExclusiveReadMemory64, U64, U64, U64, AccType )
A64OPC(ExclusiveReadMemory128, U128, U64, U64, AccType )
A64OPC(WriteMemory8, Void, U64, U64, U8, AccType )
A64OPC(WriteMemory16, Void, U64, U64, U16, AccType )
A64OPC(WriteMemory32, Void, U64, U64, U32, AccType )
A64OPC(WriteMemory64, Void, U64, U64, U64, AccType )
A64OPC(WriteMemory128, Void, U64, U64, U128, AccType )
A64OPC(ExclusiveWriteMemory8, U32, U64, U64, U8, AccType )
A64OPC(ExclusiveWriteMemory16, U32, U64, U64, U16, AccType )
A64OPC(ExclusiveWriteMemory32, U32, U64, U64, U32, AccType )
A64OPC(ExclusiveWriteMemory64, U32, U64, U64, U64, AccType )
A64OPC(ExclusiveWriteMemory128, U32, U64, U64, U128, AccType )
// Coprocessor
A32OPC(CoprocInternalOperation, Void, CoprocInfo )

View file

@ -25,7 +25,7 @@ void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) {
break;
}
const u32 vaddr = inst.GetArg(0).GetU32();
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u8 value_from_memory = cb->MemoryRead8(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});
@ -37,7 +37,7 @@ void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) {
break;
}
const u32 vaddr = inst.GetArg(0).GetU32();
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u16 value_from_memory = cb->MemoryRead16(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});
@ -49,7 +49,7 @@ void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) {
break;
}
const u32 vaddr = inst.GetArg(0).GetU32();
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u32 value_from_memory = cb->MemoryRead32(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});
@ -61,7 +61,7 @@ void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) {
break;
}
const u32 vaddr = inst.GetArg(0).GetU32();
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u64 value_from_memory = cb->MemoryRead64(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});

View file

@ -398,9 +398,8 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit,
}
if (uni.GetRegisters()[15] > jit.Regs()[15]) {
const u32 final_pc = jit.Regs()[15];
int trials = 0;
while (final_pc >= initial_pc && final_pc < expected_end_pc && trials++ < 100) {
while (jit.Regs()[15] >= initial_pc && jit.Regs()[15] < expected_end_pc && trials++ < 100 && uni.GetRegisters()[15] != jit.Regs()[15]) {
fmt::print("Warning: Possible unicorn overrrun, attempt recovery\n");
jit.Step();
}

View file

@ -545,7 +545,7 @@ CubebSink::CubebSink(std::string_view target_device_name) {
}
cubeb_get_max_channel_count(ctx, &device_channels);
device_channels = std::clamp(device_channels, 2U, 6U);
device_channels = device_channels >= 6U ? 6U : 2U;
}
CubebSink::~CubebSink() {

View file

@ -18,14 +18,16 @@
/// Helper macros to insert unused bytes or words to properly align structs. These values will be
/// zero-initialized.
#define INSERT_PADDING_BYTES(num_bytes) \
std::array<u8, num_bytes> CONCAT2(pad, __LINE__) {}
[[maybe_unused]] std::array<u8, num_bytes> CONCAT2(pad, __LINE__) {}
#define INSERT_PADDING_WORDS(num_words) \
std::array<u32, num_words> CONCAT2(pad, __LINE__) {}
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__) {}
/// These are similar to the INSERT_PADDING_* macros but do not zero-initialize the contents.
/// This keeps the structure trivial to construct.
#define INSERT_PADDING_BYTES_NOINIT(num_bytes) std::array<u8, num_bytes> CONCAT2(pad, __LINE__)
#define INSERT_PADDING_WORDS_NOINIT(num_words) std::array<u32, num_words> CONCAT2(pad, __LINE__)
#define INSERT_PADDING_BYTES_NOINIT(num_bytes) \
[[maybe_unused]] std::array<u8, num_bytes> CONCAT2(pad, __LINE__)
#define INSERT_PADDING_WORDS_NOINIT(num_words) \
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
#ifndef _MSC_VER

View file

@ -147,7 +147,6 @@ void ARM_Interface::Run() {
// Notify the debugger and go to sleep if a watchpoint was hit.
if (Has(hr, watchpoint)) {
RewindBreakpointInstruction();
if (system.DebuggerEnabled()) {
system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint());
}

View file

@ -203,7 +203,7 @@ public:
static constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
static constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
static constexpr Dynarmic::HaltReason breakpoint = Dynarmic::HaltReason::UserDefined4;
static constexpr Dynarmic::HaltReason watchpoint = Dynarmic::HaltReason::UserDefined5;
static constexpr Dynarmic::HaltReason watchpoint = Dynarmic::HaltReason::MemoryAbort;
static constexpr Dynarmic::HaltReason no_execute = Dynarmic::HaltReason::UserDefined6;
protected:

View file

@ -162,7 +162,7 @@ public:
const auto match{parent.MatchingWatchpoint(addr, size, type)};
if (match) {
parent.halted_watchpoint = match;
ReturnException(parent.jit.load()->Regs()[15], ARM_Interface::watchpoint);
parent.jit.load()->HaltExecution(ARM_Interface::watchpoint);
return false;
}
@ -211,7 +211,6 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
// Code cache size
config.code_cache_size = 512_MiB;
config.far_code_offset = 400_MiB;
// Allow memory fault handling to work
if (system.DebuggerEnabled()) {
@ -222,7 +221,6 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (!page_table) {
// Don't waste too much memory on null_jit
config.code_cache_size = 8_MiB;
config.far_code_offset = 4_MiB;
}
// Safe optimizations

View file

@ -205,7 +205,7 @@ public:
const auto match{parent.MatchingWatchpoint(addr, size, type)};
if (match) {
parent.halted_watchpoint = match;
ReturnException(parent.jit.load()->GetPC(), ARM_Interface::watchpoint);
parent.jit.load()->HaltExecution(ARM_Interface::watchpoint);
return false;
}
@ -271,7 +271,6 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
// Code cache size
config.code_cache_size = 512_MiB;
config.far_code_offset = 400_MiB;
// Allow memory fault handling to work
if (system.DebuggerEnabled()) {
@ -282,7 +281,6 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
if (!page_table) {
// Don't waste too much memory on null_jit
config.code_cache_size = 8_MiB;
config.far_code_offset = 4_MiB;
}
// Safe optimizations

View file

@ -30,19 +30,19 @@ public:
explicit KCodeMemory(KernelCore& kernel_);
Result Initialize(Core::DeviceMemory& device_memory, VAddr address, size_t size);
void Finalize();
void Finalize() override;
Result Map(VAddr address, size_t size);
Result Unmap(VAddr address, size_t size);
Result MapToOwner(VAddr address, size_t size, Svc::MemoryPermission perm);
Result UnmapFromOwner(VAddr address, size_t size);
bool IsInitialized() const {
bool IsInitialized() const override {
return m_is_initialized;
}
static void PostDestroy([[maybe_unused]] uintptr_t arg) {}
KProcess* GetOwner() const {
KProcess* GetOwner() const override {
return m_owner;
}
VAddr GetSourceAddress() const {

View file

@ -642,6 +642,10 @@ void AppletMessageQueue::RequestExit() {
PushMessage(AppletMessage::Exit);
}
void AppletMessageQueue::RequestResume() {
PushMessage(AppletMessage::Resume);
}
void AppletMessageQueue::FocusStateChanged() {
PushMessage(AppletMessage::FocusStateChanged);
}

View file

@ -90,6 +90,7 @@ public:
AppletMessage PopMessage();
std::size_t GetMessageCount() const;
void RequestExit();
void RequestResume();
void FocusStateChanged();
void OperationModeChanged();

View file

@ -34,6 +34,7 @@ enum class TransactionId {
class IBinder {
public:
virtual ~IBinder() = default;
virtual void Transact(Kernel::HLERequestContext& ctx, android::TransactionId code,
u32 flags) = 0;
virtual Kernel::KReadableEvent& GetNativeHandle() = 0;

View file

@ -30,8 +30,6 @@ add_executable(yuzu
applets/qt_web_browser_scripts.h
bootmanager.cpp
bootmanager.h
check_vulkan.cpp
check_vulkan.h
compatdb.ui
compatibility_list.cpp
compatibility_list.h
@ -155,6 +153,8 @@ add_executable(yuzu
main.cpp
main.h
main.ui
startup_checks.cpp
startup_checks.h
uisettings.cpp
uisettings.h
util/controller_navigation.cpp

View file

@ -683,12 +683,6 @@ void Config::ReadRendererValues() {
ReadGlobalSetting(Settings::values.bg_green);
ReadGlobalSetting(Settings::values.bg_blue);
if (!global && UISettings::values.has_broken_vulkan &&
Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::Vulkan &&
!Settings::values.renderer_backend.UsingGlobal()) {
Settings::values.renderer_backend.SetGlobal(true);
}
if (global) {
ReadBasicSetting(Settings::values.renderer_debug);
ReadBasicSetting(Settings::values.renderer_shader_feedback);
@ -808,7 +802,6 @@ void Config::ReadUIValues() {
ReadBasicSetting(UISettings::values.pause_when_in_background);
ReadBasicSetting(UISettings::values.mute_when_in_background);
ReadBasicSetting(UISettings::values.hide_mouse);
ReadBasicSetting(UISettings::values.has_broken_vulkan);
ReadBasicSetting(UISettings::values.disable_web_applet);
qt_config->endGroup();
@ -1357,7 +1350,6 @@ void Config::SaveUIValues() {
WriteBasicSetting(UISettings::values.pause_when_in_background);
WriteBasicSetting(UISettings::values.mute_when_in_background);
WriteBasicSetting(UISettings::values.hide_mouse);
WriteBasicSetting(UISettings::values.has_broken_vulkan);
WriteBasicSetting(UISettings::values.disable_web_applet);
qt_config->endGroup();

View file

@ -58,24 +58,9 @@ ConfigureGraphics::ConfigureGraphics(const Core::System& system_, QWidget* paren
UpdateBackgroundColorButton(new_bg_color);
});
connect(ui->button_check_vulkan, &QAbstractButton::clicked, this, [this] {
UISettings::values.has_broken_vulkan = false;
if (RetrieveVulkanDevices()) {
ui->api->setEnabled(true);
ui->button_check_vulkan->hide();
for (const auto& device : vulkan_devices) {
ui->device->addItem(device);
}
} else {
UISettings::values.has_broken_vulkan = true;
}
});
ui->api->setEnabled(!UISettings::values.has_broken_vulkan.GetValue());
ui->button_check_vulkan->setVisible(UISettings::values.has_broken_vulkan.GetValue());
ui->api->setEnabled(!UISettings::values.has_broken_vulkan);
ui->api_widget->setEnabled(!UISettings::values.has_broken_vulkan ||
Settings::IsConfiguringGlobal());
ui->bg_label->setVisible(Settings::IsConfiguringGlobal());
ui->bg_combobox->setVisible(!Settings::IsConfiguringGlobal());
}
@ -315,7 +300,7 @@ void ConfigureGraphics::UpdateAPILayout() {
vulkan_device = Settings::values.vulkan_device.GetValue(true);
shader_backend = Settings::values.shader_backend.GetValue(true);
ui->device_widget->setEnabled(false);
ui->backend_widget->setEnabled(UISettings::values.has_broken_vulkan.GetValue());
ui->backend_widget->setEnabled(false);
} else {
vulkan_device = Settings::values.vulkan_device.GetValue();
shader_backend = Settings::values.shader_backend.GetValue();
@ -337,9 +322,9 @@ void ConfigureGraphics::UpdateAPILayout() {
}
}
bool ConfigureGraphics::RetrieveVulkanDevices() try {
void ConfigureGraphics::RetrieveVulkanDevices() try {
if (UISettings::values.has_broken_vulkan) {
return false;
return;
}
using namespace Vulkan;
@ -355,11 +340,8 @@ bool ConfigureGraphics::RetrieveVulkanDevices() try {
const std::string name = vk::PhysicalDevice(device, dld).GetProperties().deviceName;
vulkan_devices.push_back(QString::fromStdString(name));
}
return true;
} catch (const Vulkan::vk::Exception& exception) {
LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what());
return false;
}
Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
@ -440,11 +422,4 @@ void ConfigureGraphics::SetupPerGameUI() {
ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true)));
ConfigurationShared::InsertGlobalItem(
ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true)));
if (UISettings::values.has_broken_vulkan) {
ui->backend_widget->setEnabled(true);
ConfigurationShared::SetColoredComboBox(
ui->backend, ui->backend_widget,
static_cast<int>(Settings::values.shader_backend.GetValue(true)));
}
}

View file

@ -41,7 +41,7 @@ private:
void UpdateDeviceSelection(int device);
void UpdateShaderBackendSelection(int backend);
bool RetrieveVulkanDevices();
void RetrieveVulkanDevices();
void SetupPerGameUI();

View file

@ -6,7 +6,7 @@
<rect>
<x>0</x>
<y>0</y>
<width>471</width>
<width>541</width>
<height>759</height>
</rect>
</property>
@ -574,13 +574,6 @@
</property>
</spacer>
</item>
<item>
<widget class="QPushButton" name="button_check_vulkan">
<property name="text">
<string>Check for Working Vulkan</string>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>

View file

@ -116,7 +116,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include "video_core/shader_notify.h"
#include "yuzu/about_dialog.h"
#include "yuzu/bootmanager.h"
#include "yuzu/check_vulkan.h"
#include "yuzu/compatdb.h"
#include "yuzu/compatibility_list.h"
#include "yuzu/configuration/config.h"
@ -132,6 +131,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include "yuzu/install_dialog.h"
#include "yuzu/loading_screen.h"
#include "yuzu/main.h"
#include "yuzu/startup_checks.h"
#include "yuzu/uisettings.h"
using namespace Common::Literals;
@ -253,7 +253,7 @@ static QString PrettyProductName() {
return QSysInfo::prettyProductName();
}
GMainWindow::GMainWindow()
GMainWindow::GMainWindow(bool has_broken_vulkan)
: ui{std::make_unique<Ui::MainWindow>()}, system{std::make_unique<Core::System>()},
input_subsystem{std::make_shared<InputCommon::InputSubsystem>()},
config{std::make_unique<Config>(*system)},
@ -353,17 +353,15 @@ GMainWindow::GMainWindow()
MigrateConfigFiles();
if (!CheckVulkan()) {
config->Save();
if (has_broken_vulkan) {
UISettings::values.has_broken_vulkan = true;
QMessageBox::warning(this, tr("Broken Vulkan Installation Detected"),
tr("Vulkan initialization failed during boot.<br><br>Click <a "
"href='https://yuzu-emu.org/wiki/faq/"
"#yuzu-starts-with-the-error-broken-vulkan-installation-detected'>"
"here for instructions to fix the issue</a>."));
QMessageBox::warning(
this, tr("Broken Vulkan Installation Detected"),
tr("Vulkan initialization failed on the previous boot.<br><br>Click <a "
"href='https://yuzu-emu.org/wiki/faq/"
"#yuzu-starts-with-the-error-broken-vulkan-installation-detected'>here for "
"instructions to fix the issue</a>."));
}
if (UISettings::values.has_broken_vulkan) {
Settings::values.renderer_backend = Settings::RendererBackend::OpenGL;
renderer_status_button->setDisabled(true);
@ -2577,6 +2575,7 @@ void GMainWindow::OnPauseContinueGame() {
if (emu_thread->IsRunning()) {
OnPauseGame();
} else {
RequestGameResume();
OnStartGame();
}
}
@ -3757,6 +3756,21 @@ void GMainWindow::RequestGameExit() {
}
}
void GMainWindow::RequestGameResume() {
auto& sm{system->ServiceManager()};
auto applet_oe = sm.GetService<Service::AM::AppletOE>("appletOE");
auto applet_ae = sm.GetService<Service::AM::AppletAE>("appletAE");
if (applet_oe != nullptr) {
applet_oe->GetMessageQueue()->RequestResume();
return;
}
if (applet_ae != nullptr) {
applet_ae->GetMessageQueue()->RequestResume();
}
}
void GMainWindow::filterBarSetChecked(bool state) {
ui->action_Show_Filter_Bar->setChecked(state);
emit(OnToggleFilterBar());
@ -3858,6 +3872,11 @@ void GMainWindow::SetDiscordEnabled([[maybe_unused]] bool state) {
#endif
int main(int argc, char* argv[]) {
bool has_broken_vulkan = false;
if (StartupChecks(argv[0], &has_broken_vulkan)) {
return 0;
}
Common::DetachedTasks detached_tasks;
MicroProfileOnThreadCreate("Frontend");
SCOPE_EXIT({ MicroProfileShutdown(); });
@ -3897,7 +3916,7 @@ int main(int argc, char* argv[]) {
// generating shaders
setlocale(LC_ALL, "C");
GMainWindow main_window{};
GMainWindow main_window{has_broken_vulkan};
// After settings have been loaded by GMainWindow, apply the filter
main_window.show();

View file

@ -118,7 +118,7 @@ class GMainWindow : public QMainWindow {
public:
void filterBarSetChecked(bool state);
void UpdateUITheme();
explicit GMainWindow();
explicit GMainWindow(bool has_broken_vulkan);
~GMainWindow() override;
bool DropAction(QDropEvent* event);
@ -244,6 +244,7 @@ private:
bool ConfirmChangeGame();
bool ConfirmForceLockedExit();
void RequestGameExit();
void RequestGameResume();
void closeEvent(QCloseEvent* event) override;
private slots:

136
src/yuzu/startup_checks.cpp Executable file
View file

@ -0,0 +1,136 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/vulkan_common/vulkan_wrapper.h"
#ifdef _WIN32
#include <cstring> // for memset, strncpy
#include <processthreadsapi.h>
#include <windows.h>
#elif defined(YUZU_UNIX)
#include <errno.h>
#include <sys/wait.h>
#include <unistd.h>
#endif
#include <cstdio>
#include "video_core/vulkan_common/vulkan_instance.h"
#include "video_core/vulkan_common/vulkan_library.h"
#include "yuzu/startup_checks.h"
void CheckVulkan() {
// Just start the Vulkan loader, this will crash if something is wrong
try {
Vulkan::vk::InstanceDispatch dld;
const Common::DynamicLibrary library = Vulkan::OpenLibrary();
const Vulkan::vk::Instance instance =
Vulkan::CreateInstance(library, dld, VK_API_VERSION_1_0);
} catch (const Vulkan::vk::Exception& exception) {
std::fprintf(stderr, "Failed to initialize Vulkan: %s\n", exception.what());
}
}
bool StartupChecks(const char* arg0, bool* has_broken_vulkan) {
#ifdef _WIN32
// Check environment variable to see if we are the child
char variable_contents[8];
const DWORD startup_check_var =
GetEnvironmentVariableA(STARTUP_CHECK_ENV_VAR, variable_contents, 8);
if (startup_check_var > 0 && std::strncmp(variable_contents, "ON", 8) == 0) {
CheckVulkan();
return true;
}
// Set the startup variable for child processes
const bool env_var_set = SetEnvironmentVariableA(STARTUP_CHECK_ENV_VAR, "ON");
if (!env_var_set) {
std::fprintf(stderr, "SetEnvironmentVariableA failed to set %s with error %d\n",
STARTUP_CHECK_ENV_VAR, GetLastError());
return false;
}
PROCESS_INFORMATION process_info;
std::memset(&process_info, '\0', sizeof(process_info));
if (!SpawnChild(arg0, &process_info)) {
return false;
}
// Wait until the processs exits and get exit code from it
WaitForSingleObject(process_info.hProcess, INFINITE);
DWORD exit_code = STILL_ACTIVE;
const int err = GetExitCodeProcess(process_info.hProcess, &exit_code);
if (err == 0) {
std::fprintf(stderr, "GetExitCodeProcess failed with error %d\n", GetLastError());
}
// Vulkan is broken if the child crashed (return value is not zero)
*has_broken_vulkan = (exit_code != 0);
if (CloseHandle(process_info.hProcess) == 0) {
std::fprintf(stderr, "CloseHandle failed with error %d\n", GetLastError());
}
if (CloseHandle(process_info.hThread) == 0) {
std::fprintf(stderr, "CloseHandle failed with error %d\n", GetLastError());
}
if (!SetEnvironmentVariableA(STARTUP_CHECK_ENV_VAR, nullptr)) {
std::fprintf(stderr, "SetEnvironmentVariableA failed to clear %s with error %d\n",
STARTUP_CHECK_ENV_VAR, GetLastError());
}
#elif defined(YUZU_UNIX)
const pid_t pid = fork();
if (pid == 0) {
CheckVulkan();
return true;
} else if (pid == -1) {
const int err = errno;
std::fprintf(stderr, "fork failed with error %d\n", err);
return false;
}
// Get exit code from child process
int status;
const int r_val = wait(&status);
if (r_val == -1) {
const int err = errno;
std::fprintf(stderr, "wait failed with error %d\n", err);
return false;
}
// Vulkan is broken if the child crashed (return value is not zero)
*has_broken_vulkan = (status != 0);
#endif
return false;
}
#ifdef _WIN32
bool SpawnChild(const char* arg0, PROCESS_INFORMATION* pi) {
STARTUPINFOA startup_info;
std::memset(&startup_info, '\0', sizeof(startup_info));
startup_info.cb = sizeof(startup_info);
char p_name[255];
std::strncpy(p_name, arg0, 255);
const bool process_created = CreateProcessA(nullptr, // lpApplicationName
p_name, // lpCommandLine
nullptr, // lpProcessAttributes
nullptr, // lpThreadAttributes
false, // bInheritHandles
0, // dwCreationFlags
nullptr, // lpEnvironment
nullptr, // lpCurrentDirectory
&startup_info, // lpStartupInfo
pi // lpProcessInformation
);
if (!process_created) {
std::fprintf(stderr, "CreateProcessA failed with error %d\n", GetLastError());
return false;
}
return true;
}
#endif

17
src/yuzu/startup_checks.h Executable file
View file

@ -0,0 +1,17 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#ifdef _WIN32
#include <windows.h>
#endif
constexpr char STARTUP_CHECK_ENV_VAR[] = "YUZU_DO_STARTUP_CHECKS";
void CheckVulkan();
bool StartupChecks(const char* arg0, bool* has_broken_vulkan);
#ifdef _WIN32
bool SpawnChild(const char* arg0, PROCESS_INFORMATION* pi);
#endif

View file

@ -78,7 +78,7 @@ struct Values {
Settings::Setting<bool> mute_when_in_background{false, "muteWhenInBackground"};
Settings::Setting<bool> hide_mouse{true, "hideInactiveMouse"};
// Set when Vulkan is known to crash the application
Settings::Setting<bool> has_broken_vulkan{false, "has_broken_vulkan"};
bool has_broken_vulkan = false;
Settings::Setting<bool> select_user_on_boot{false, "select_user_on_boot"};