early-access version 3275

This commit is contained in:
pineappleEA 2023-01-03 22:43:57 +01:00
parent 4b11185ae8
commit 0f64bad6bd
26 changed files with 350 additions and 160 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 3274.
This is the source code for early-access 3275.
## Legal Notice

View file

@ -11,6 +11,11 @@
namespace Core::HID {
constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
constexpr s32 HID_TRIGGER_MAX = 0x7fff;
// Use a common UUID for TAS and Virtual Gamepad
constexpr Common::UUID TAS_UUID =
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
constexpr Common::UUID VIRTUAL_UUID =
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
EmulatedController::EmulatedController(NpadIdType npad_id_type_) : npad_id_type(npad_id_type_) {}
@ -392,10 +397,6 @@ void EmulatedController::ReloadInput() {
nfc_devices[index]->ForceUpdate();
}
// Use a common UUID for TAS
static constexpr Common::UUID TAS_UUID = Common::UUID{
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
// Register TAS devices. No need to force update
for (std::size_t index = 0; index < tas_button_devices.size(); ++index) {
if (!tas_button_devices[index]) {
@ -421,10 +422,6 @@ void EmulatedController::ReloadInput() {
});
}
// Use a common UUID for Virtual Gamepad
static constexpr Common::UUID VIRTUAL_UUID = Common::UUID{
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
// Register virtual devices. No need to force update
for (std::size_t index = 0; index < virtual_button_devices.size(); ++index) {
if (!virtual_button_devices[index]) {
@ -842,7 +839,12 @@ void EmulatedController::SetStick(const Common::Input::CallbackStatus& callback,
// Only read stick values that have the same uuid or are over the threshold to avoid flapping
if (controller.stick_values[index].uuid != uuid) {
if (!stick_value.down && !stick_value.up && !stick_value.left && !stick_value.right) {
const bool is_tas = uuid == TAS_UUID;
if (is_tas && stick_value.x.value == 0 && stick_value.y.value == 0) {
return;
}
if (!is_tas && !stick_value.down && !stick_value.up && !stick_value.left &&
!stick_value.right) {
return;
}
}

View file

@ -74,8 +74,6 @@ void IRS::DeactivateIrsensor(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_IRS, "(STUBBED) called, applet_resource_user_id={}",
applet_resource_user_id);
npad_device->SetPollingMode(Common::Input::PollingMode::Active);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
}
@ -514,7 +512,7 @@ void IRS::StopImageProcessorAsync(Kernel::HLERequestContext& ctx) {
auto result = IsIrCameraHandleValid(parameters.camera_handle);
if (result.IsSuccess()) {
// TODO: Stop image processor async
npad_device->SetPollingMode(Common::Input::PollingMode::IR);
npad_device->SetPollingMode(Common::Input::PollingMode::Active);
result = ResultSuccess;
}

View file

@ -264,6 +264,16 @@ DriverResult JoyconDriver::SetPollingMode() {
irs_protocol->DisableIrs();
}
if (nfc_protocol->IsEnabled()) {
amiibo_detected = false;
nfc_protocol->DisableNfc();
}
if (ring_protocol->IsEnabled()) {
ring_connected = false;
ring_protocol->DisableRingCon();
}
if (irs_enabled && supported_features.irs) {
auto result = irs_protocol->EnableIrs();
if (result == DriverResult::Success) {
@ -274,11 +284,6 @@ DriverResult JoyconDriver::SetPollingMode() {
LOG_ERROR(Input, "Error enabling IRS");
}
if (nfc_protocol->IsEnabled()) {
amiibo_detected = false;
nfc_protocol->DisableNfc();
}
if (nfc_enabled && supported_features.nfc) {
auto result = nfc_protocol->EnableNfc();
if (result == DriverResult::Success) {
@ -292,11 +297,6 @@ DriverResult JoyconDriver::SetPollingMode() {
LOG_ERROR(Input, "Error enabling NFC");
}
if (ring_protocol->IsEnabled()) {
ring_connected = false;
ring_protocol->DisableRingCon();
}
if (hidbus_enabled && supported_features.hidbus) {
auto result = ring_protocol->EnableRingCon();
if (result == DriverResult::Success) {
@ -428,6 +428,12 @@ DriverResult JoyconDriver::SetPasiveMode() {
}
DriverResult JoyconDriver::SetActiveMode() {
if (is_ring_disabled_by_irs) {
is_ring_disabled_by_irs = false;
SetActiveMode();
return SetRingConMode();
}
std::scoped_lock lock{mutex};
motion_enabled = true;
hidbus_enabled = false;
@ -444,6 +450,10 @@ DriverResult JoyconDriver::SetIrMode() {
return DriverResult::NotSupported;
}
if (ring_connected) {
is_ring_disabled_by_irs = true;
}
motion_enabled = false;
hidbus_enabled = false;
nfc_enabled = false;

View file

@ -108,6 +108,7 @@ private:
bool starlink_connected{};
bool ring_connected{};
bool amiibo_detected{};
bool is_ring_disabled_by_irs{};
// Harware configuration
u8 leds{};

View file

@ -74,8 +74,8 @@ DriverResult JoyconCommonProtocol::SendData(std::span<const u8> buffer) {
}
DriverResult JoyconCommonProtocol::GetSubCommandResponse(SubCommand sc, std::vector<u8>& output) {
constexpr int timeout_mili = 100;
constexpr int MaxTries = 10;
constexpr int timeout_mili = 66;
constexpr int MaxTries = 15;
int tries = 0;
output.resize(MaxSubCommandResponseSize);

View file

@ -57,11 +57,16 @@ public:
return start_address;
}
[[nodiscard]] bool IsPropietaryDriver() const noexcept {
return is_propietary_driver;
}
protected:
ProgramHeader sph{};
std::array<u32, 8> gp_passthrough_mask{};
Stage stage{};
u32 start_address{};
bool is_propietary_driver{};
};
} // namespace Shader

View file

@ -677,6 +677,30 @@ void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
}
}
void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank,
u32 offset_start = 0, u32 offset_end = std::numeric_limits<u16>::max()) {
const IR::Value bank{inst.Arg(0)};
const IR::Value offset{inst.Arg(1)};
if (!bank.IsImmediate() || !offset.IsImmediate()) {
return;
}
const auto bank_value = bank.U32();
if (bank_value != which_bank) {
return;
}
const auto offset_value = offset.U32();
if (offset_value < offset_start || offset_value >= offset_end) {
return;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
inst.ReplaceUsesWith(IR::Value{env.ReadCbufValue(bank_value, offset_value)});
} else {
inst.ReplaceUsesWith(
IR::Value{Common::BitCast<f32>(env.ReadCbufValue(bank_value, offset_value))});
}
}
void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::GetRegister:
@ -825,13 +849,17 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
case IR::Opcode::GetCbufF32:
case IR::Opcode::GetCbufU32:
if (env.HasHLEMacroState()) {
return FoldConstBuffer(env, block, inst);
FoldConstBuffer(env, block, inst);
}
if (env.IsPropietaryDriver()) {
FoldDriverConstBuffer(env, block, inst, 1);
}
break;
default:
break;
}
}
} // Anonymous namespace
void ConstantPropagationPass(Environment& env, IR::Program& program) {

View file

@ -94,7 +94,7 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
draw_state.topology = topology;
ProcessDrawIndirect(true);
ProcessDrawIndirect();
}
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first,
@ -105,7 +105,7 @@ void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_firs
draw_state.index_buffer.first = index_first;
draw_state.index_buffer.count = index_count;
ProcessDrawIndirect(true);
ProcessDrawIndirect();
}
void DrawManager::SetInlineIndexBuffer(u32 index) {
@ -216,9 +216,12 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
}
}
void DrawManager::ProcessDrawIndirect(bool draw_indexed) {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology,
draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count);
void DrawManager::ProcessDrawIndirect() {
LOG_TRACE(
HW_GPU,
"called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}",
draw_state.topology, indirect_state.is_indexed, indirect_state.include_count,
indirect_state.buffer_size, indirect_state.max_draw_counts);
UpdateTopology();

View file

@ -85,7 +85,7 @@ private:
void ProcessDraw(bool draw_indexed, u32 instance_count);
void ProcessDrawIndirect(bool draw_indexed);
void ProcessDrawIndirect();
Maxwell3D* maxwell3d{};
State draw_state{};

View file

@ -685,7 +685,8 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
return regs.reg_array[method];
}
void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) {
void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset,
HLEReplacementAttributeType name) {
const u64 key = (static_cast<u64>(bank) << 32) | offset;
replace_table.emplace(key, name);
}

View file

@ -2711,7 +2711,7 @@ public:
u32 post_z_pixel_imask; ///< 0x0F1C
INSERT_PADDING_BYTES_NOINIT(0x20);
ConstantColorRendering const_color_rendering; ///< 0x0F40
s32 stencil_back_ref; ///< 0x0F54
u32 stencil_back_ref; ///< 0x0F54
u32 stencil_back_mask; ///< 0x0F58
u32 stencil_back_func_mask; ///< 0x0F5C
INSERT_PADDING_BYTES_NOINIT(0x14);
@ -2835,9 +2835,9 @@ public:
Blend blend; ///< 0x133C
u32 stencil_enable; ///< 0x1380
StencilOp stencil_front_op; ///< 0x1384
s32 stencil_front_ref; ///< 0x1394
s32 stencil_front_func_mask; ///< 0x1398
s32 stencil_front_mask; ///< 0x139C
u32 stencil_front_ref; ///< 0x1394
u32 stencil_front_func_mask; ///< 0x1398
u32 stencil_front_mask; ///< 0x139C
INSERT_PADDING_BYTES_NOINIT(0x4);
u32 draw_auto_start_byte_count; ///< 0x13A4
PsSaturate frag_color_clamp; ///< 0x13A8
@ -3031,14 +3031,14 @@ public:
EngineHint engine_state{EngineHint::None};
enum class HLEReplaceName : u32 {
enum class HLEReplacementAttributeType : u32 {
BaseVertex = 0x0,
BaseInstance = 0x1,
};
void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name);
void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name);
std::unordered_map<u64, HLEReplaceName> replace_table;
std::unordered_map<u64, HLEReplacementAttributeType> replace_table;
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
@ -3089,7 +3089,7 @@ public:
std::vector<u8> inline_index_draw_indexes;
GPUVAddr getMacroAddress(size_t index) const {
GPUVAddr GetMacroAddress(size_t index) const {
return macro_addresses[index];
}
@ -3100,7 +3100,7 @@ public:
RefreshParametersImpl();
}
bool AnyParametersDirty() {
bool AnyParametersDirty() const {
return current_macro_dirty;
}
@ -3196,11 +3196,6 @@ private:
bool execute_on{true};
std::array<bool, Regs::NUM_REGS> draw_command{};
std::vector<u32> deferred_draw_method;
enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
DrawMode draw_mode{DrawMode::General};
bool draw_indexed{};
std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
std::vector<GPUVAddr> macro_addresses;
bool current_macro_dirty{};

View file

@ -81,14 +81,15 @@ public:
params.is_indexed = false;
params.include_count = false;
params.count_start_address = 0;
params.indirect_start_address = maxwell3d.getMacroAddress(1);
params.indirect_start_address = maxwell3d.GetMacroAddress(1);
params.buffer_size = 4 * sizeof(u32);
params.max_draw_counts = 1;
params.stride = 0;
if (extended) {
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
maxwell3d.SetHLEReplacementAttributeType(
0, 0x640, Maxwell::HLEReplacementAttributeType::BaseInstance);
}
maxwell3d.draw_manager->DrawArrayIndirect(topology);
@ -125,7 +126,8 @@ private:
if (extended) {
maxwell3d.regs.global_base_instance_index = base_instance;
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
maxwell3d.SetHLEReplacementAttributeType(
0, 0x640, Maxwell::HLEReplacementAttributeType::BaseInstance);
}
maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance,
@ -160,13 +162,15 @@ public:
maxwell3d.regs.global_base_instance_index = base_instance;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
maxwell3d.SetHLEReplacementAttributeType(0, 0x640,
Maxwell::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
auto& params = maxwell3d.draw_manager->GetIndirectParams();
params.is_indexed = true;
params.include_count = false;
params.count_start_address = 0;
params.indirect_start_address = maxwell3d.getMacroAddress(1);
params.indirect_start_address = maxwell3d.GetMacroAddress(1);
params.buffer_size = 5 * sizeof(u32);
params.max_draw_counts = 1;
params.stride = 0;
@ -190,8 +194,10 @@ private:
maxwell3d.regs.global_base_instance_index = base_instance;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
maxwell3d.SetHLEReplacementAttributeType(0, 0x640,
Maxwell::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
maxwell3d.draw_manager->DrawIndex(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
@ -253,15 +259,17 @@ public:
auto& params = maxwell3d.draw_manager->GetIndirectParams();
params.is_indexed = true;
params.include_count = true;
params.count_start_address = maxwell3d.getMacroAddress(4);
params.indirect_start_address = maxwell3d.getMacroAddress(5);
params.count_start_address = maxwell3d.GetMacroAddress(4);
params.indirect_start_address = maxwell3d.GetMacroAddress(5);
params.buffer_size = stride * draw_count;
params.max_draw_counts = draw_count;
params.stride = stride;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
maxwell3d.SetHLEReplacementAttributeType(0, 0x640,
Maxwell::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
maxwell3d.engine_state = Maxwell::EngineHint::None;
maxwell3d.replace_table.clear();
@ -298,8 +306,10 @@ private:
const u32 base_instance = parameters[base + 4];
maxwell3d.regs.vertex_id_base = base_vertex;
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
maxwell3d.SetHLEReplacementAttributeType(
0, 0x640, Maxwell::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
base_vertex, base_instance, parameters[base + 1]);

View file

@ -51,7 +51,7 @@ using VideoCommon::LoadPipelines;
using VideoCommon::SerializePipeline;
using Context = ShaderContext::Context;
constexpr u32 CACHE_VERSION = 8;
constexpr u32 CACHE_VERSION = 9;
template <typename Container>
auto MakeSpan(Container& container) {

View file

@ -354,6 +354,7 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image;
@ -361,6 +362,7 @@ struct TextureCacheParams {
using ImageView = OpenGL::ImageView;
using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer;
using AsyncBuffer = u32;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;

View file

@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment;
using VideoCommon::GenericEnvironment;
using VideoCommon::GraphicsEnvironment;
constexpr u32 CACHE_VERSION = 9;
constexpr u32 CACHE_VERSION = 10;
template <typename Container>
auto MakeSpan(Container& container) {

View file

@ -886,30 +886,52 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
if (!state_tracker.TouchStencilProperties()) {
return;
}
if (regs.stencil_two_side_enable) {
// Separate values per face
scheduler.Record(
[front_ref = regs.stencil_front_ref, front_write_mask = regs.stencil_front_mask,
front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_ref,
back_write_mask = regs.stencil_back_mask,
back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) {
// Front face
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref);
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask);
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_BIT, front_test_mask);
// Back face
bool update_references = state_tracker.TouchStencilReference();
bool update_write_mask = state_tracker.TouchStencilWriteMask();
bool update_compare_masks = state_tracker.TouchStencilCompare();
if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) {
update_references = true;
update_write_mask = true;
update_compare_masks = true;
}
if (update_references) {
scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_ref != back_ref;
// Front face
cmdbuf.SetStencilReference(
set_back ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FACE_FRONT_AND_BACK, front_ref);
if (set_back) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
}
});
}
if (update_write_mask) {
scheduler.Record([front_write_mask = regs.stencil_front_mask,
back_write_mask = regs.stencil_back_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_write_mask != back_write_mask;
// Front face
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_write_mask);
if (set_back) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
}
});
}
if (update_compare_masks) {
scheduler.Record([front_test_mask = regs.stencil_front_func_mask,
back_test_mask = regs.stencil_back_func_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_test_mask != back_test_mask;
// Front face
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_test_mask);
if (set_back) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
});
} else {
// Front face defines both faces
scheduler.Record([ref = regs.stencil_front_ref, write_mask = regs.stencil_front_mask,
test_mask = regs.stencil_front_func_mask](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref);
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask);
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask);
}
});
}
}
@ -990,7 +1012,7 @@ void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& re
constexpr size_t POINT = 0;
constexpr size_t LINE = 1;
constexpr size_t POLYGON = 2;
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
static constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POINT, // Points
LINE, // Lines
LINE, // LineLoop
@ -1099,13 +1121,12 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
}
void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!regs.logic_op.enable) {
return;
}
if (!state_tracker.TouchLogicOp()) {
return;
}
auto op = static_cast<VkLogicOp>(static_cast<u32>(regs.logic_op.op) - 0x1500);
const auto op_value = static_cast<u32>(regs.logic_op.op);
auto op = op_value >= 0x1500 && op_value < 0x1510 ? static_cast<VkLogicOp>(op_value - 0x1500)
: VK_LOGIC_OP_NO_OP;
scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); });
}

View file

@ -30,7 +30,8 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
constexpr VkMemoryPropertyFlags HOST_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
constexpr VkMemoryPropertyFlags HOST_FLAGS =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
bool IsStreamHeap(VkMemoryHeap heap) noexcept {
@ -92,7 +93,9 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.pNext = nullptr,
.flags = 0,
.size = STREAM_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@ -244,19 +247,15 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage,
bool deferred) {
const u32 log2 = Common::Log2Ceil64(size);
VkBufferUsageFlags usage_flags{};
if (usage == MemoryUsage::Upload) {
usage_flags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
}
if (usage == MemoryUsage::Download) {
usage_flags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
}
vk::Buffer buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 1ULL << log2,
.usage = usage_flags,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,

View file

@ -33,6 +33,9 @@ Flags MakeInvalidationFlags() {
BlendConstants,
DepthBounds,
StencilProperties,
StencilReference,
StencilWriteMask,
StencilCompare,
LineWidth,
CullMode,
DepthBoundsEnable,
@ -99,14 +102,17 @@ void SetupDirtyDepthBounds(Tables& tables) {
}
void SetupDirtyStencilProperties(Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_two_side_enable)] = StencilProperties;
table[OFF(stencil_front_ref)] = StencilProperties;
table[OFF(stencil_front_mask)] = StencilProperties;
table[OFF(stencil_front_func_mask)] = StencilProperties;
table[OFF(stencil_back_ref)] = StencilProperties;
table[OFF(stencil_back_mask)] = StencilProperties;
table[OFF(stencil_back_func_mask)] = StencilProperties;
const auto setup = [&](size_t position, u8 flag) {
tables[0][position] = flag;
tables[1][position] = StencilProperties;
};
tables[0][OFF(stencil_two_side_enable)] = StencilProperties;
setup(OFF(stencil_front_ref), StencilReference);
setup(OFF(stencil_front_mask), StencilWriteMask);
setup(OFF(stencil_front_func_mask), StencilCompare);
setup(OFF(stencil_back_ref), StencilReference);
setup(OFF(stencil_back_mask), StencilWriteMask);
setup(OFF(stencil_back_func_mask), StencilCompare);
}
void SetupDirtyLineWidth(Tables& tables) {

View file

@ -35,6 +35,9 @@ enum : u8 {
BlendConstants,
DepthBounds,
StencilProperties,
StencilReference,
StencilWriteMask,
StencilCompare,
LineWidth,
CullMode,
@ -113,6 +116,24 @@ public:
return Exchange(Dirty::StencilProperties, false);
}
bool TouchStencilReference() {
return Exchange(Dirty::StencilReference, false);
}
bool TouchStencilWriteMask() {
return Exchange(Dirty::StencilWriteMask, false);
}
bool TouchStencilCompare() {
return Exchange(Dirty::StencilCompare, false);
}
bool TouchStencilSide(bool two_sided_stencil_new) {
bool result = two_sided_stencil != two_sided_stencil_new;
two_sided_stencil = two_sided_stencil_new;
return result;
}
bool TouchLineWidth() const {
return Exchange(Dirty::LineWidth, false);
}
@ -218,6 +239,7 @@ private:
Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
bool two_sided_stencil = false;
};
} // namespace Vulkan

View file

@ -812,8 +812,12 @@ StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.Request(size, MemoryUsage::Upload);
}
StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_buffer_pool.Request(size, MemoryUsage::Download);
StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
return staging_buffer_pool.Request(size, MemoryUsage::Download, deferred);
}
void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
staging_buffer_pool.FreeDeferred(ref);
}
bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {

View file

@ -51,7 +51,9 @@ public:
StagingBufferRef UploadStagingBuffer(size_t size);
StagingBufferRef DownloadStagingBuffer(size_t size);
StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
void FreeDeferredStagingBuffer(StagingBufferRef& ref);
void TickFrame();
@ -347,6 +349,7 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
using Runtime = Vulkan::TextureCacheRuntime;
using Image = Vulkan::Image;
@ -354,6 +357,7 @@ struct TextureCacheParams {
using ImageView = Vulkan::ImageView;
using Sampler = Vulkan::Sampler;
using Framebuffer = Vulkan::Framebuffer;
using AsyncBuffer = Vulkan::StagingBufferRef;
};
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;

View file

@ -325,6 +325,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
ASSERT(local_size <= std::numeric_limits<u32>::max());
local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot;
is_propietary_driver = texture_bound == 2;
has_hle_engine_state =
maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro;
}
@ -350,11 +351,11 @@ std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffe
if (it == maxwell3d->replace_table.end()) {
return std::nullopt;
}
const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) {
const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplacementAttributeType name) {
switch (name) {
case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex:
case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseVertex:
return Shader::ReplaceConstant::BaseVertex;
case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance:
case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseInstance:
return Shader::ReplaceConstant::BaseInstance;
default:
UNREACHABLE();
@ -399,6 +400,7 @@ ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_com
stage = Shader::Stage::Compute;
local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc;
texture_bound = kepler_compute->regs.tex_cb_index;
is_propietary_driver = texture_bound == 2;
shared_memory_size = qmd.shared_alloc;
workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
}
@ -498,6 +500,7 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
}
}
is_propietary_driver = texture_bound == 2;
}
void FileEnvironment::Dump(u64 hash) {

View file

@ -646,7 +646,28 @@ bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
template <class P>
void TextureCache<P>::CommitAsyncFlushes() {
// This is intentionally passing the value by copy
committed_downloads.push(uncommitted_downloads);
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
const std::span<const ImageId> download_ids = uncommitted_downloads;
if (download_ids.empty()) {
committed_downloads.emplace_back(std::move(uncommitted_downloads));
uncommitted_downloads.clear();
async_buffers.emplace_back(std::optional<AsyncBuffer>{});
return;
}
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
}
async_buffers.emplace_back(download_map);
}
committed_downloads.emplace_back(std::move(uncommitted_downloads));
uncommitted_downloads.clear();
}
@ -655,37 +676,58 @@ void TextureCache<P>::PopAsyncFlushes() {
if (committed_downloads.empty()) {
return;
}
const std::span<const ImageId> download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop();
return;
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
const std::span<const ImageId> download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop_front();
async_buffers.pop_front();
return;
}
auto download_map = *async_buffers.front();
std::span<u8> download_span = download_map.mapped_span;
for (size_t i = download_ids.size(); i > 0; i--) {
const ImageBase& image = slot_images[download_ids[i - 1]];
const auto copies = FullDownloadCopies(image.info);
download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
std::span<u8> download_span_alt = download_span.subspan(download_map.offset);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt,
swizzle_data_buffer);
}
runtime.FreeDeferredStagingBuffer(download_map);
committed_downloads.pop_front();
async_buffers.pop_front();
} else {
const std::span<const ImageId> download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop_front();
return;
}
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
const size_t original_offset = download_map.offset;
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += image.unswizzled_size_bytes;
}
// Wait for downloads to finish
runtime.Finish();
download_map.offset = original_offset;
std::span<u8> download_span = download_map.mapped_span;
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
swizzle_data_buffer);
download_map.offset += image.unswizzled_size_bytes;
download_span = download_span.subspan(image.unswizzled_size_bytes);
}
committed_downloads.pop_front();
}
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
const size_t original_offset = download_map.offset;
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += image.unswizzled_size_bytes;
}
// Wait for downloads to finish
runtime.Finish();
download_map.offset = original_offset;
std::span<u8> download_span = download_map.mapped_span;
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
swizzle_data_buffer);
download_map.offset += image.unswizzled_size_bytes;
download_span = download_span.subspan(image.unswizzled_size_bytes);
}
committed_downloads.pop();
}
template <class P>
@ -1475,6 +1517,27 @@ void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
}
}
template <class P>
void TextureCache<P>::BubbleUpImages(VAddr cpu_addr, size_t size) {
ForEachCPUPage(cpu_addr, size, [this](u64 page) {
const auto it = page_table.find(page);
if (it == page_table.end()) {
return;
}
std::vector<ImageMapId>& map_vector = it->second;
for (size_t i = 1; i < map_vector.size(); i++) {
ImageMapView& bottom_map = slot_map_views[map_vector[i - 1]];
ImageMapView& top_map = slot_map_views[map_vector[i]];
if (slot_images[bottom_map.image_id].modification_tick <
slot_images[top_map.image_id].modification_tick) {
std::swap(map_vector[i - 1], map_vector[i]);
} else {
return;
}
}
});
}
template <class P>
ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
Image& image = slot_images[image_id];
@ -1788,6 +1851,7 @@ template <class P>
void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
image.flags |= ImageFlagBits::GpuModified;
image.modification_tick = ++modification_tick;
BubbleUpImages(image.cpu_addr, image.guest_size_bytes);
}
template <class P>

View file

@ -92,6 +92,8 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// True when the API can do asynchronous texture downloads.
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
@ -106,6 +108,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
using ImageView = typename P::ImageView;
using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer;
using AsyncBuffer = typename P::AsyncBuffer;
struct BlitImages {
ImageId dst_id;
@ -316,6 +319,8 @@ private:
template <typename Func>
void ForEachSparseSegment(ImageBase& image, Func&& func);
void BubbleUpImages(VAddr cpu_addr, size_t size);
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
@ -403,7 +408,8 @@ private:
// TODO: This data structure is not optimal and it should be reworked
std::vector<ImageId> uncommitted_downloads;
std::queue<std::vector<ImageId>> committed_downloads;
std::deque<std::vector<ImageId>> committed_downloads;
std::deque<std::optional<AsyncBuffer>> async_buffers;
struct LRUItemParams {
using ObjectType = ImageId;

View file

@ -756,20 +756,23 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
CollectToolingInfo();
if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
switch (arch) {
case NvidiaArchitecture::AmpereOrNewer:
LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math");
is_float16_supported = false;
break;
case NvidiaArchitecture::Turing:
break;
case NvidiaArchitecture::VoltaOrOlder:
LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor");
khr_push_descriptor = false;
break;
}
const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff;
if (nv_major_version < 527) {
const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
switch (arch) {
case NvidiaArchitecture::AmpereOrNewer:
LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math");
is_float16_supported = false;
break;
case NvidiaArchitecture::Turing:
break;
case NvidiaArchitecture::VoltaOrOlder:
LOG_WARNING(Render_Vulkan,
"Blacklisting Volta and older from VK_KHR_push_descriptor");
khr_push_descriptor = false;
break;
}
}
if (nv_major_version >= 510) {
LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits");
cant_blit_msaa = true;
@ -834,8 +837,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
if (ext_vertex_input_dynamic_state && is_intel_windows) {
LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
ext_vertex_input_dynamic_state = false;
const u32 version = (properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) {
LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
ext_vertex_input_dynamic_state = false;
}
}
if (is_float16_supported && is_intel_windows) {
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.